[llvm-branch-commits] [flang] [flang][OpenMP] Basic mapping of `do concurrent ... reduce` to OpenMP (PR #146033)
Kareem Ergawy via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sun Jun 29 22:28:17 PDT 2025
https://github.com/ergawy updated https://github.com/llvm/llvm-project/pull/146033
>From f7837d52f95340e627852ebaa34407ac318156c5 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Fri, 27 Jun 2025 00:58:08 -0500
Subject: [PATCH 1/2] [flang][OpenMP] Basic mapping of `do concurrent ...
reduce` to OpenMP
Now that we have changes introduced by #145837, mapping reductions from
`do concurrent` to OpenMP is almost trivial. This PR adds such mapping.
---
.../OpenMP/DoConcurrentConversion.cpp | 83 +++++++++++++------
.../Transforms/DoConcurrent/reduce_add.mlir | 73 ++++++++++++++++
.../Transforms/DoConcurrent/reduce_local.mlir | 83 +++++++++++++++++++
3 files changed, 212 insertions(+), 27 deletions(-)
create mode 100644 flang/test/Transforms/DoConcurrent/reduce_add.mlir
create mode 100644 flang/test/Transforms/DoConcurrent/reduce_local.mlir
diff --git a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
index 709cf1d0938fa..31076f6eb328f 100644
--- a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
+++ b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
@@ -312,6 +312,19 @@ class DoConcurrentConversion
bool isComposite) const {
mlir::omp::WsloopOperands wsloopClauseOps;
+ auto cloneFIRRegionToOMP = [&rewriter](mlir::Region &firRegion,
+ mlir::Region &ompRegion) {
+ if (!firRegion.empty()) {
+ rewriter.cloneRegionBefore(firRegion, ompRegion, ompRegion.begin());
+ auto firYield =
+ mlir::cast<fir::YieldOp>(ompRegion.back().getTerminator());
+ rewriter.setInsertionPoint(firYield);
+ rewriter.create<mlir::omp::YieldOp>(firYield.getLoc(),
+ firYield.getOperands());
+ rewriter.eraseOp(firYield);
+ }
+ };
+
// For `local` (and `local_init`) opernads, emit corresponding `private`
// clauses and attach these clauses to the workshare loop.
if (!loop.getLocalVars().empty())
@@ -326,50 +339,65 @@ class DoConcurrentConversion
TODO(localizer.getLoc(),
"local_init conversion is not supported yet");
- auto oldIP = rewriter.saveInsertionPoint();
+ mlir::OpBuilder::InsertionGuard guard(rewriter);
rewriter.setInsertionPointAfter(localizer);
+
auto privatizer = rewriter.create<mlir::omp::PrivateClauseOp>(
localizer.getLoc(), sym.getLeafReference().str() + ".omp",
localizer.getTypeAttr().getValue(),
mlir::omp::DataSharingClauseType::Private);
- if (!localizer.getInitRegion().empty()) {
- rewriter.cloneRegionBefore(localizer.getInitRegion(),
- privatizer.getInitRegion(),
- privatizer.getInitRegion().begin());
- auto firYield = mlir::cast<fir::YieldOp>(
- privatizer.getInitRegion().back().getTerminator());
- rewriter.setInsertionPoint(firYield);
- rewriter.create<mlir::omp::YieldOp>(firYield.getLoc(),
- firYield.getOperands());
- rewriter.eraseOp(firYield);
- }
-
- if (!localizer.getDeallocRegion().empty()) {
- rewriter.cloneRegionBefore(localizer.getDeallocRegion(),
- privatizer.getDeallocRegion(),
- privatizer.getDeallocRegion().begin());
- auto firYield = mlir::cast<fir::YieldOp>(
- privatizer.getDeallocRegion().back().getTerminator());
- rewriter.setInsertionPoint(firYield);
- rewriter.create<mlir::omp::YieldOp>(firYield.getLoc(),
- firYield.getOperands());
- rewriter.eraseOp(firYield);
- }
-
- rewriter.restoreInsertionPoint(oldIP);
+ cloneFIRRegionToOMP(localizer.getInitRegion(),
+ privatizer.getInitRegion());
+ cloneFIRRegionToOMP(localizer.getDeallocRegion(),
+ privatizer.getDeallocRegion());
wsloopClauseOps.privateVars.push_back(op);
wsloopClauseOps.privateSyms.push_back(
mlir::SymbolRefAttr::get(privatizer));
}
+ if (!loop.getReduceVars().empty()) {
+ for (auto [op, byRef, sym, arg] : llvm::zip_equal(
+ loop.getReduceVars(), loop.getReduceByrefAttr().asArrayRef(),
+ loop.getReduceSymsAttr().getAsRange<mlir::SymbolRefAttr>(),
+ loop.getRegionReduceArgs())) {
+ auto firReducer =
+ mlir::SymbolTable::lookupNearestSymbolFrom<fir::DeclareReductionOp>(
+ loop, sym);
+
+ mlir::OpBuilder::InsertionGuard guard(rewriter);
+ rewriter.setInsertionPointAfter(firReducer);
+
+ auto ompReducer = rewriter.create<mlir::omp::DeclareReductionOp>(
+ firReducer.getLoc(), sym.getLeafReference().str() + ".omp",
+ firReducer.getTypeAttr().getValue());
+
+ cloneFIRRegionToOMP(firReducer.getAllocRegion(),
+ ompReducer.getAllocRegion());
+ cloneFIRRegionToOMP(firReducer.getInitializerRegion(),
+ ompReducer.getInitializerRegion());
+ cloneFIRRegionToOMP(firReducer.getReductionRegion(),
+ ompReducer.getReductionRegion());
+ cloneFIRRegionToOMP(firReducer.getAtomicReductionRegion(),
+ ompReducer.getAtomicReductionRegion());
+ cloneFIRRegionToOMP(firReducer.getCleanupRegion(),
+ ompReducer.getCleanupRegion());
+
+ wsloopClauseOps.reductionVars.push_back(op);
+ wsloopClauseOps.reductionByref.push_back(byRef);
+ wsloopClauseOps.reductionSyms.push_back(
+ mlir::SymbolRefAttr::get(ompReducer));
+ }
+ }
+
auto wsloopOp =
rewriter.create<mlir::omp::WsloopOp>(loop.getLoc(), wsloopClauseOps);
wsloopOp.setComposite(isComposite);
Fortran::common::openmp::EntryBlockArgs wsloopArgs;
wsloopArgs.priv.vars = wsloopClauseOps.privateVars;
+ wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars;
Fortran::common::openmp::genEntryBlock(rewriter, wsloopArgs,
wsloopOp.getRegion());
@@ -393,7 +421,8 @@ class DoConcurrentConversion
clauseOps.loopLowerBounds.size())))
rewriter.replaceAllUsesWith(loopNestArg, wsloopArg);
- for (unsigned i = 0; i < loop.getLocalVars().size(); ++i)
+ for (unsigned i = 0;
+ i < loop.getLocalVars().size() + loop.getReduceVars().size(); ++i)
loopNestOp.getRegion().eraseArgument(clauseOps.loopLowerBounds.size());
return loopNestOp;
diff --git a/flang/test/Transforms/DoConcurrent/reduce_add.mlir b/flang/test/Transforms/DoConcurrent/reduce_add.mlir
new file mode 100644
index 0000000000000..1ea3e3e527335
--- /dev/null
+++ b/flang/test/Transforms/DoConcurrent/reduce_add.mlir
@@ -0,0 +1,73 @@
+// Tests mapping reductions from fir to OpenMP.
+
+// RUN: fir-opt --omp-do-concurrent-conversion="map-to=host" %s | FileCheck %s
+
+fir.declare_reduction @add_reduction_i32 : i32 init {
+^bb0(%arg0: i32):
+ %c0_i32 = arith.constant 0 : i32
+ fir.yield(%c0_i32 : i32)
+} combiner {
+^bb0(%arg0: i32, %arg1: i32):
+ %0 = arith.addi %arg0, %arg1 : i32
+ fir.yield(%0 : i32)
+}
+
+func.func @_QPdo_concurrent_reduce() {
+ %3 = fir.alloca i32 {bindc_name = "s", uniq_name = "_QFdo_concurrent_reduceEs"}
+ %4:2 = hlfir.declare %3 {uniq_name = "_QFdo_concurrent_reduceEs"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %c1 = arith.constant 1 : index
+ fir.do_concurrent {
+ %7 = fir.alloca i32 {bindc_name = "i"}
+ %8:2 = hlfir.declare %7 {uniq_name = "_QFdo_concurrent_reduceEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ fir.do_concurrent.loop (%arg0) = (%c1) to (%c1) step (%c1) reduce(@add_reduction_i32 #fir.reduce_attr<add> %4#0 -> %arg1 : !fir.ref<i32>) {
+ %9 = fir.convert %arg0 : (index) -> i32
+ fir.store %9 to %8#0 : !fir.ref<i32>
+ %10:2 = hlfir.declare %arg1 {uniq_name = "_QFdo_concurrent_reduceEs"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %11 = fir.load %10#0 : !fir.ref<i32>
+ %c1_i32_0 = arith.constant 1 : i32
+ %12 = arith.addi %11, %c1_i32_0 : i32
+ hlfir.assign %12 to %10#0 : i32, !fir.ref<i32>
+ }
+ }
+ return
+}
+
+// CHECK-LABEL: omp.declare_reduction @add_reduction_i32.omp : i32 init {
+// CHECK: ^bb0(%[[VAL_0:.*]]: i32):
+// CHECK: %[[VAL_1:.*]] = arith.constant 0 : i32
+// CHECK: omp.yield(%[[VAL_1]] : i32)
+
+// CHECK-LABEL: } combiner {
+// CHECK: ^bb0(%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32):
+// CHECK: %[[VAL_2:.*]] = arith.addi %[[VAL_0]], %[[VAL_1]] : i32
+// CHECK: omp.yield(%[[VAL_2]] : i32)
+// CHECK: }
+
+// CHECK-LABEL: func.func @_QPdo_concurrent_reduce() {
+// CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i"}
+// CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFdo_concurrent_reduceEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "s", uniq_name = "_QFdo_concurrent_reduceEs"}
+// CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFdo_concurrent_reduceEs"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK: %[[VAL_4:.*]] = arith.constant 1 : index
+// CHECK: omp.parallel {
+// CHECK: %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "i"}
+// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFdo_concurrent_reduceEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
+// CHECK: omp.wsloop reduction(@add_reduction_i32.omp %[[VAL_3]]#0 -> %[[VAL_7:.*]] : !fir.ref<i32>) {
+// CHECK: omp.loop_nest (%[[VAL_8:.*]]) : index = (%[[VAL_4]]) to (%[[VAL_4]]) inclusive step (%[[VAL_4]]) {
+// CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (index) -> i32
+// CHECK: fir.store %[[VAL_9]] to %[[VAL_6]]#0 : !fir.ref<i32>
+// CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFdo_concurrent_reduceEs"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_10]]#0 : !fir.ref<i32>
+// CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32
+// CHECK: %[[VAL_13:.*]] = arith.addi %[[VAL_11]], %[[VAL_12]] : i32
+// CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_10]]#0 : i32, !fir.ref<i32>
+// CHECK: omp.yield
+// CHECK: }
+// CHECK: }
+// CHECK: omp.terminator
+// CHECK: }
+
+// CHECK: return
+// CHECK: }
+
diff --git a/flang/test/Transforms/DoConcurrent/reduce_local.mlir b/flang/test/Transforms/DoConcurrent/reduce_local.mlir
new file mode 100644
index 0000000000000..0f667109e6e83
--- /dev/null
+++ b/flang/test/Transforms/DoConcurrent/reduce_local.mlir
@@ -0,0 +1,83 @@
+// Tests mapping reductions and local from fir to OpenMP.
+
+// RUN: fir-opt --omp-do-concurrent-conversion="map-to=host" %s | FileCheck %s
+
+fir.declare_reduction @add_reduction_i32 : i32 init {
+^bb0(%arg0: i32):
+ %c0_i32 = arith.constant 0 : i32
+ fir.yield(%c0_i32 : i32)
+} combiner {
+^bb0(%arg0: i32, %arg1: i32):
+ %0 = arith.addi %arg0, %arg1 : i32
+ fir.yield(%0 : i32)
+}
+ fir.local {type = local} @_QFdo_concurrent_reduceEl_private_i32 : i32
+ func.func @_QPdo_concurrent_reduce() {
+ %3 = fir.alloca i32 {bindc_name = "l", uniq_name = "_QFdo_concurrent_reduceEl"}
+ %4:2 = hlfir.declare %3 {uniq_name = "_QFdo_concurrent_reduceEl"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %5 = fir.alloca i32 {bindc_name = "s", uniq_name = "_QFdo_concurrent_reduceEs"}
+ %6:2 = hlfir.declare %5 {uniq_name = "_QFdo_concurrent_reduceEs"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %c1 = arith.constant 1 : index
+ fir.do_concurrent {
+ %9 = fir.alloca i32 {bindc_name = "i"}
+ %10:2 = hlfir.declare %9 {uniq_name = "_QFdo_concurrent_reduceEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ fir.do_concurrent.loop (%arg0) = (%c1) to (%c1) step (%c1) local(@_QFdo_concurrent_reduceEl_private_i32 %4#0 -> %arg1 : !fir.ref<i32>) reduce(@add_reduction_i32 #fir.reduce_attr<add> %6#0 -> %arg2 : !fir.ref<i32>) {
+ %11 = fir.convert %arg0 : (index) -> i32
+ fir.store %11 to %10#0 : !fir.ref<i32>
+ %12:2 = hlfir.declare %arg1 {uniq_name = "_QFdo_concurrent_reduceEl"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %13:2 = hlfir.declare %arg2 {uniq_name = "_QFdo_concurrent_reduceEs"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %c1_i32_0 = arith.constant 1 : i32
+ hlfir.assign %c1_i32_0 to %12#0 : i32, !fir.ref<i32>
+ %14 = fir.load %13#0 : !fir.ref<i32>
+ %15 = fir.load %12#0 : !fir.ref<i32>
+ %16 = arith.addi %14, %15 : i32
+ hlfir.assign %16 to %13#0 : i32, !fir.ref<i32>
+ }
+ }
+ return
+}
+
+// CHECK-LABEL: omp.declare_reduction @add_reduction_i32.omp : i32 init {
+// CHECK: ^bb0(%[[VAL_0:.*]]: i32):
+// CHECK: %[[VAL_1:.*]] = arith.constant 0 : i32
+// CHECK: omp.yield(%[[VAL_1]] : i32)
+
+// CHECK-LABEL: } combiner {
+// CHECK: ^bb0(%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32):
+// CHECK: %[[VAL_2:.*]] = arith.addi %[[VAL_0]], %[[VAL_1]] : i32
+// CHECK: omp.yield(%[[VAL_2]] : i32)
+// CHECK: }
+
+// CHECK: omp.private {type = private} @_QFdo_concurrent_reduceEl_private_i32.omp : i32
+
+// CHECK-LABEL: func.func @_QPdo_concurrent_reduce() {
+// CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i"}
+// CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFdo_concurrent_reduceEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "l", uniq_name = "_QFdo_concurrent_reduceEl"}
+// CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFdo_concurrent_reduceEl"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK: %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "s", uniq_name = "_QFdo_concurrent_reduceEs"}
+// CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFdo_concurrent_reduceEs"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK: %[[VAL_6:.*]] = arith.constant 1 : index
+// CHECK: omp.parallel {
+// CHECK: %[[VAL_7:.*]] = fir.alloca i32 {bindc_name = "i"}
+// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFdo_concurrent_reduceEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK: omp.wsloop private(@_QFdo_concurrent_reduceEl_private_i32.omp %[[VAL_3]]#0 -> %[[VAL_9:.*]] : !fir.ref<i32>) reduction(@add_reduction_i32.omp %[[VAL_5]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) {
+// CHECK: omp.loop_nest (%[[VAL_11:.*]]) : index = (%[[VAL_6]]) to (%[[VAL_6]]) inclusive step (%[[VAL_6]]) {
+// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (index) -> i32
+// CHECK: fir.store %[[VAL_12]] to %[[VAL_8]]#0 : !fir.ref<i32>
+// CHECK: %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_9]] {uniq_name = "_QFdo_concurrent_reduceEl"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFdo_concurrent_reduceEs"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK: %[[VAL_15:.*]] = arith.constant 1 : i32
+// CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_13]]#0 : i32, !fir.ref<i32>
+// CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
+// CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_13]]#0 : !fir.ref<i32>
+// CHECK: %[[VAL_18:.*]] = arith.addi %[[VAL_16]], %[[VAL_17]] : i32
+// CHECK: hlfir.assign %[[VAL_18]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
+// CHECK: omp.yield
+// CHECK: }
+// CHECK: }
+// CHECK: omp.terminator
+// CHECK: }
+// CHECK: return
+// CHECK: }
+
>From 185b43f8c45750b13ad4a0e08723b7b35c549111 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Sun, 29 Jun 2025 23:30:22 -0500
Subject: [PATCH 2/2] Add all regions test.
---
.../DoConcurrent/reduce_all_regions.mlir | 70 +++++++++++++++++++
1 file changed, 70 insertions(+)
create mode 100644 flang/test/Transforms/DoConcurrent/reduce_all_regions.mlir
diff --git a/flang/test/Transforms/DoConcurrent/reduce_all_regions.mlir b/flang/test/Transforms/DoConcurrent/reduce_all_regions.mlir
new file mode 100644
index 0000000000000..3d5b8bf22af75
--- /dev/null
+++ b/flang/test/Transforms/DoConcurrent/reduce_all_regions.mlir
@@ -0,0 +1,70 @@
+// Tests mapping reductions from fir to OpenMP (all regions).
+
+// RUN: fir-opt --omp-do-concurrent-conversion="map-to=host" %s | FileCheck %s
+
+fir.declare_reduction @add_reduction_i32 : i32 init {
+^bb0(%arg0: i32):
+ fir.yield(%arg0 : i32)
+} combiner {
+^bb0(%arg0: i32, %arg1: i32):
+ fir.yield(%arg0 : i32)
+} atomic {
+^bb0(%arg0: !fir.ref<i32>, %arg1: !fir.ref<i32>):
+ fir.yield(%arg0 : !fir.ref<i32>)
+} cleanup {
+^bb0(%arg0: i32):
+ fir.yield
+}
+
+func.func @_QPdo_concurrent_reduce() {
+ %3 = fir.alloca i32 {bindc_name = "s", uniq_name = "_QFdo_concurrent_reduceEs"}
+ %4:2 = hlfir.declare %3 {uniq_name = "_QFdo_concurrent_reduceEs"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %c1 = arith.constant 1 : index
+ fir.do_concurrent {
+ %7 = fir.alloca i32 {bindc_name = "i"}
+ %8:2 = hlfir.declare %7 {uniq_name = "_QFdo_concurrent_reduceEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ fir.do_concurrent.loop (%arg0) = (%c1) to (%c1) step (%c1) reduce(@add_reduction_i32 #fir.reduce_attr<add> %4#0 -> %arg1 : !fir.ref<i32>) {
+ %9 = fir.convert %arg0 : (index) -> i32
+ fir.store %9 to %8#0 : !fir.ref<i32>
+ }
+ }
+ return
+}
+
+// CHECK-LABEL: omp.declare_reduction @add_reduction_i32.omp : i32 init {
+// CHECK: ^bb0(%[[VAL_0:.*]]: i32):
+// CHECK: omp.yield(%[[VAL_0]] : i32)
+
+// CHECK-LABEL: } combiner {
+// CHECK: ^bb0(%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32):
+// CHECK: omp.yield(%[[VAL_0]] : i32)
+
+// CHECK-LABEL: } atomic {
+// CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<i32>, %[[VAL_1:.*]]: !fir.ref<i32>):
+// CHECK: omp.yield(%[[VAL_0]] : !fir.ref<i32>)
+
+// CHECK-LABEL: } cleanup {
+// CHECK: ^bb0(%[[VAL_0:.*]]: i32):
+// CHECK: omp.yield
+// CHECK: }
+
+// CHECK-LABEL: func.func @_QPdo_concurrent_reduce() {
+// CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i"}
+// CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFdo_concurrent_reduceEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "s", uniq_name = "_QFdo_concurrent_reduceEs"}
+// CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFdo_concurrent_reduceEs"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK: %[[VAL_4:.*]] = arith.constant 1 : index
+// CHECK: omp.parallel {
+// CHECK: %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "i"}
+// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFdo_concurrent_reduceEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK: omp.wsloop reduction(@add_reduction_i32.omp %[[VAL_3]]#0 -> %[[VAL_7:.*]] : !fir.ref<i32>) {
+// CHECK: omp.loop_nest (%[[VAL_8:.*]]) : index = (%[[VAL_4]]) to (%[[VAL_4]]) inclusive step (%[[VAL_4]]) {
+// CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (index) -> i32
+// CHECK: fir.store %[[VAL_9]] to %[[VAL_6]]#0 : !fir.ref<i32>
+// CHECK: omp.yield
+// CHECK: }
+// CHECK: }
+// CHECK: omp.terminator
+// CHECK: }
+// CHECK: return
+// CHECK: }
More information about the llvm-branch-commits
mailing list