[flang-commits] [flang] [Flang][OpenMP] Move loop privatization out of dispatch (PR #106066)
Sergio Afonso via flang-commits
flang-commits at lists.llvm.org
Thu Aug 29 03:44:16 PDT 2024
https://github.com/skatrak updated https://github.com/llvm/llvm-project/pull/106066
>From a1872814655989a3f8025b57c49261e1545bb2b2 Mon Sep 17 00:00:00 2001
From: Sergio Afonso <safonsof at amd.com>
Date: Mon, 26 Aug 2024 13:29:52 +0100
Subject: [PATCH] [Flang][OpenMP] Move loop privatization out of dispatch
This patch moves the creation of `DataSharingProcessor` instances for loop
constructs out of `genOMPDispatch()` and into their corresponding codegen
functions. This is a necessary first step to enable a proper handling of
privatization on composite constructs.
Some tests are updated due to a change of order between clause processing and
privatization.
---
flang/lib/Lower/OpenMP/OpenMP.cpp | 142 +++++++++++-------
.../test/Lower/OpenMP/parallel-reduction3.f90 | 14 +-
.../wsloop-reduction-array-assumed-shape.f90 | 14 +-
.../Lower/OpenMP/wsloop-reduction-array.f90 | 18 +--
.../Lower/OpenMP/wsloop-reduction-array2.f90 | 18 +--
.../wsloop-reduction-multiple-clauses.f90 | 22 +--
6 files changed, 127 insertions(+), 101 deletions(-)
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 0f7eafdc9a4736..3a48b370e6e150 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1044,7 +1044,6 @@ static void genDistributeClauses(lower::AbstractConverter &converter,
cp.processAllocate(clauseOps);
cp.processDistSchedule(stmtCtx, clauseOps);
cp.processOrder(clauseOps);
- // TODO Support delayed privatization.
}
static void genFlushClauses(lower::AbstractConverter &converter,
@@ -1128,7 +1127,6 @@ static void genSimdClauses(lower::AbstractConverter &converter,
cp.processSafelen(clauseOps);
cp.processSimdlen(clauseOps);
- // TODO Support delayed privatization.
cp.processTODO<clause::Linear, clause::Nontemporal>(
loc, llvm::omp::Directive::OMPD_simd);
}
@@ -1299,7 +1297,6 @@ static void genWsloopClauses(
cp.processOrdered(clauseOps);
cp.processReduction(loc, clauseOps, &reductionTypes, &reductionSyms);
cp.processSchedule(stmtCtx, clauseOps);
- // TODO Support delayed privatization.
cp.processTODO<clause::Allocate, clause::Linear>(
loc, llvm::omp::Directive::OMPD_do);
@@ -1924,17 +1921,25 @@ genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
// also be a leaf of a composite construct
//===----------------------------------------------------------------------===//
-static void genStandaloneDistribute(
- lower::AbstractConverter &converter, lower::SymMap &symTable,
- semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
- mlir::Location loc, const ConstructQueue &queue,
- ConstructQueue::const_iterator item, DataSharingProcessor &dsp) {
+static void genStandaloneDistribute(lower::AbstractConverter &converter,
+ lower::SymMap &symTable,
+ semantics::SemanticsContext &semaCtx,
+ lower::pft::Evaluation &eval,
+ mlir::Location loc,
+ const ConstructQueue &queue,
+ ConstructQueue::const_iterator item) {
lower::StatementContext stmtCtx;
mlir::omp::DistributeOperands distributeClauseOps;
genDistributeClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
distributeClauseOps);
+ // TODO: Support delayed privatization.
+ DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
+ /*shouldCollectPreDeterminedSymbols=*/true,
+ /*useDelayedPrivatization=*/false, &symTable);
+ dsp.processStep1();
+
mlir::omp::LoopNestOperands loopNestClauseOps;
llvm::SmallVector<const semantics::Symbol *> iv;
genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
@@ -1955,8 +1960,7 @@ static void genStandaloneDo(lower::AbstractConverter &converter,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval, mlir::Location loc,
const ConstructQueue &queue,
- ConstructQueue::const_iterator item,
- DataSharingProcessor &dsp) {
+ ConstructQueue::const_iterator item) {
lower::StatementContext stmtCtx;
mlir::omp::WsloopOperands wsloopClauseOps;
@@ -1965,6 +1969,12 @@ static void genStandaloneDo(lower::AbstractConverter &converter,
genWsloopClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
wsloopClauseOps, reductionTypes, reductionSyms);
+ // TODO: Support delayed privatization.
+ DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
+ /*shouldCollectPreDeterminedSymbols=*/true,
+ /*useDelayedPrivatization=*/false, &symTable);
+ dsp.processStep1();
+
mlir::omp::LoopNestOperands loopNestClauseOps;
llvm::SmallVector<const semantics::Symbol *> iv;
genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
@@ -2004,11 +2014,16 @@ static void genStandaloneSimd(lower::AbstractConverter &converter,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval, mlir::Location loc,
const ConstructQueue &queue,
- ConstructQueue::const_iterator item,
- DataSharingProcessor &dsp) {
+ ConstructQueue::const_iterator item) {
mlir::omp::SimdOperands simdClauseOps;
genSimdClauses(converter, semaCtx, item->clauses, loc, simdClauseOps);
+ // TODO: Support delayed privatization.
+ DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
+ /*shouldCollectPreDeterminedSymbols=*/true,
+ /*useDelayedPrivatization=*/false, &symTable);
+ dsp.processStep1();
+
mlir::omp::LoopNestOperands loopNestClauseOps;
llvm::SmallVector<const semantics::Symbol *> iv;
genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
@@ -2024,11 +2039,13 @@ static void genStandaloneSimd(lower::AbstractConverter &converter,
llvm::omp::Directive::OMPD_simd, dsp);
}
-static void genStandaloneTaskloop(
- lower::AbstractConverter &converter, lower::SymMap &symTable,
- semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
- mlir::Location loc, const ConstructQueue &queue,
- ConstructQueue::const_iterator item, DataSharingProcessor &dsp) {
+static void genStandaloneTaskloop(lower::AbstractConverter &converter,
+ lower::SymMap &symTable,
+ semantics::SemanticsContext &semaCtx,
+ lower::pft::Evaluation &eval,
+ mlir::Location loc,
+ const ConstructQueue &queue,
+ ConstructQueue::const_iterator item) {
TODO(loc, "Taskloop construct");
}
@@ -2040,7 +2057,7 @@ static void genCompositeDistributeParallelDo(
lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
mlir::Location loc, const ConstructQueue &queue,
- ConstructQueue::const_iterator item, DataSharingProcessor &dsp) {
+ ConstructQueue::const_iterator item) {
assert(std::distance(item, queue.end()) == 3 && "Invalid leaf constructs");
TODO(loc, "Composite DISTRIBUTE PARALLEL DO");
}
@@ -2049,16 +2066,18 @@ static void genCompositeDistributeParallelDoSimd(
lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
mlir::Location loc, const ConstructQueue &queue,
- ConstructQueue::const_iterator item, DataSharingProcessor &dsp) {
+ ConstructQueue::const_iterator item) {
assert(std::distance(item, queue.end()) == 4 && "Invalid leaf constructs");
TODO(loc, "Composite DISTRIBUTE PARALLEL DO SIMD");
}
-static void genCompositeDistributeSimd(
- lower::AbstractConverter &converter, lower::SymMap &symTable,
- semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
- mlir::Location loc, const ConstructQueue &queue,
- ConstructQueue::const_iterator item, DataSharingProcessor &dsp) {
+static void genCompositeDistributeSimd(lower::AbstractConverter &converter,
+ lower::SymMap &symTable,
+ semantics::SemanticsContext &semaCtx,
+ lower::pft::Evaluation &eval,
+ mlir::Location loc,
+ const ConstructQueue &queue,
+ ConstructQueue::const_iterator item) {
lower::StatementContext stmtCtx;
assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs");
@@ -2073,6 +2092,12 @@ static void genCompositeDistributeSimd(
mlir::omp::SimdOperands simdClauseOps;
genSimdClauses(converter, semaCtx, simdItem->clauses, loc, simdClauseOps);
+ // TODO: Support delayed privatization.
+ DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval,
+ /*shouldCollectPreDeterminedSymbols=*/true,
+ /*useDelayedPrivatization=*/false, &symTable);
+ dsp.processStep1();
+
// Pass the innermost leaf construct's clauses because that's where COLLAPSE
// is placed by construct decomposition.
mlir::omp::LoopNestOperands loopNestClauseOps;
@@ -2109,8 +2134,7 @@ static void genCompositeDoSimd(lower::AbstractConverter &converter,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval, mlir::Location loc,
const ConstructQueue &queue,
- ConstructQueue::const_iterator item,
- DataSharingProcessor &dsp) {
+ ConstructQueue::const_iterator item) {
lower::StatementContext stmtCtx;
assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs");
@@ -2127,6 +2151,12 @@ static void genCompositeDoSimd(lower::AbstractConverter &converter,
mlir::omp::SimdOperands simdClauseOps;
genSimdClauses(converter, semaCtx, simdItem->clauses, loc, simdClauseOps);
+ // TODO: Support delayed privatization.
+ DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval,
+ /*shouldCollectPreDeterminedSymbols=*/true,
+ /*useDelayedPrivatization=*/false, &symTable);
+ dsp.processStep1();
+
// Pass the innermost leaf construct's clauses because that's where COLLAPSE
// is placed by construct decomposition.
mlir::omp::LoopNestOperands loopNestClauseOps;
@@ -2157,11 +2187,13 @@ static void genCompositeDoSimd(lower::AbstractConverter &converter,
llvm::omp::Directive::OMPD_do_simd, dsp);
}
-static void genCompositeTaskloopSimd(
- lower::AbstractConverter &converter, lower::SymMap &symTable,
- semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
- mlir::Location loc, const ConstructQueue &queue,
- ConstructQueue::const_iterator item, DataSharingProcessor &dsp) {
+static void genCompositeTaskloopSimd(lower::AbstractConverter &converter,
+ lower::SymMap &symTable,
+ semantics::SemanticsContext &semaCtx,
+ lower::pft::Evaluation &eval,
+ mlir::Location loc,
+ const ConstructQueue &queue,
+ ConstructQueue::const_iterator item) {
assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs");
TODO(loc, "Composite TASKLOOP SIMD");
}
@@ -2170,30 +2202,35 @@ static void genCompositeTaskloopSimd(
// Dispatch
//===----------------------------------------------------------------------===//
-static bool genOMPCompositeDispatch(
- lower::AbstractConverter &converter, lower::SymMap &symTable,
- semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
- mlir::Location loc, const ConstructQueue &queue,
- ConstructQueue::const_iterator item, DataSharingProcessor &dsp) {
+static bool genOMPCompositeDispatch(lower::AbstractConverter &converter,
+ lower::SymMap &symTable,
+ semantics::SemanticsContext &semaCtx,
+ lower::pft::Evaluation &eval,
+ mlir::Location loc,
+ const ConstructQueue &queue,
+ ConstructQueue::const_iterator item) {
using llvm::omp::Directive;
using lower::omp::matchLeafSequence;
+ // TODO: Privatization for composite constructs is currently only done based
+ // on the clauses for their last leaf construct, which may not always be
+ // correct. Consider per-leaf privatization of composite constructs once
+ // delayed privatization is supported by all participating ops.
if (matchLeafSequence(item, queue, Directive::OMPD_distribute_parallel_do))
genCompositeDistributeParallelDo(converter, symTable, semaCtx, eval, loc,
- queue, item, dsp);
+ queue, item);
else if (matchLeafSequence(item, queue,
Directive::OMPD_distribute_parallel_do_simd))
genCompositeDistributeParallelDoSimd(converter, symTable, semaCtx, eval,
- loc, queue, item, dsp);
+ loc, queue, item);
else if (matchLeafSequence(item, queue, Directive::OMPD_distribute_simd))
genCompositeDistributeSimd(converter, symTable, semaCtx, eval, loc, queue,
- item, dsp);
+ item);
else if (matchLeafSequence(item, queue, Directive::OMPD_do_simd))
- genCompositeDoSimd(converter, symTable, semaCtx, eval, loc, queue, item,
- dsp);
+ genCompositeDoSimd(converter, symTable, semaCtx, eval, loc, queue, item);
else if (matchLeafSequence(item, queue, Directive::OMPD_taskloop_simd))
genCompositeTaskloopSimd(converter, symTable, semaCtx, eval, loc, queue,
- item, dsp);
+ item);
else
return false;
@@ -2208,20 +2245,12 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
ConstructQueue::const_iterator item) {
assert(item != queue.end());
- std::optional<DataSharingProcessor> loopDsp;
bool loopLeaf = llvm::omp::getDirectiveAssociation(item->id) ==
llvm::omp::Association::Loop;
if (loopLeaf) {
symTable.pushScope();
- // TODO: Use one DataSharingProcessor for each leaf of a composite
- // construct.
- loopDsp.emplace(converter, semaCtx, item->clauses, eval,
- /*shouldCollectPreDeterminedSymbols=*/true,
- /*useDelayedPrivatization=*/false, &symTable);
- loopDsp->processStep1();
-
if (genOMPCompositeDispatch(converter, symTable, semaCtx, eval, loc, queue,
- item, *loopDsp)) {
+ item)) {
symTable.popScope();
return;
}
@@ -2233,11 +2262,10 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
break;
case llvm::omp::Directive::OMPD_distribute:
genStandaloneDistribute(converter, symTable, semaCtx, eval, loc, queue,
- item, *loopDsp);
+ item);
break;
case llvm::omp::Directive::OMPD_do:
- genStandaloneDo(converter, symTable, semaCtx, eval, loc, queue, item,
- *loopDsp);
+ genStandaloneDo(converter, symTable, semaCtx, eval, loc, queue, item);
break;
case llvm::omp::Directive::OMPD_loop:
TODO(loc, "Unhandled directive " + llvm::omp::getOpenMPDirectiveName(dir));
@@ -2266,8 +2294,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
// in genBodyOfOp
break;
case llvm::omp::Directive::OMPD_simd:
- genStandaloneSimd(converter, symTable, semaCtx, eval, loc, queue, item,
- *loopDsp);
+ genStandaloneSimd(converter, symTable, semaCtx, eval, loc, queue, item);
break;
case llvm::omp::Directive::OMPD_single:
genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item);
@@ -2297,8 +2324,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
genTaskgroupOp(converter, symTable, semaCtx, eval, loc, queue, item);
break;
case llvm::omp::Directive::OMPD_taskloop:
- genStandaloneTaskloop(converter, symTable, semaCtx, eval, loc, queue, item,
- *loopDsp);
+ genStandaloneTaskloop(converter, symTable, semaCtx, eval, loc, queue, item);
break;
case llvm::omp::Directive::OMPD_taskwait:
genTaskwaitOp(converter, symTable, semaCtx, eval, loc, queue, item);
diff --git a/flang/test/Lower/OpenMP/parallel-reduction3.f90 b/flang/test/Lower/OpenMP/parallel-reduction3.f90
index 441dff34553d4f..591f41cb946602 100644
--- a/flang/test/Lower/OpenMP/parallel-reduction3.f90
+++ b/flang/test/Lower/OpenMP/parallel-reduction3.f90
@@ -69,19 +69,19 @@
! CHECK: %[[VAL_13:.*]] = arith.constant 0 : i32
! CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_12]]#0 : i32, !fir.box<!fir.array<?xi32>>
! CHECK: omp.parallel {
-! CHECK: %[[VAL_14:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
-! CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_14]] {uniq_name = "_QFsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK: %[[VAL_16:.*]] = fir.alloca !fir.box<!fir.array<?xi32>>
-! CHECK: fir.store %[[VAL_12]]#0 to %[[VAL_16]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
+! CHECK: %[[VAL_14:.*]] = fir.alloca !fir.box<!fir.array<?xi32>>
+! CHECK: fir.store %[[VAL_12]]#0 to %[[VAL_14]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
+! CHECK: %[[VAL_15:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK: %[[VAL_16:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[VAL_17:.*]] = arith.constant 1 : i32
! CHECK: %[[VAL_18:.*]] = arith.constant 100 : i32
! CHECK: %[[VAL_19:.*]] = arith.constant 1 : i32
-! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_Uxi32 %[[VAL_16]] -> %[[VAL_20:.*]] : !fir.ref<!fir.box<!fir.array<?xi32>>>) {
+! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_Uxi32 %[[VAL_14]] -> %[[VAL_20:.*]] : !fir.ref<!fir.box<!fir.array<?xi32>>>) {
! CHECK-NEXT: omp.loop_nest (%[[VAL_21:.*]]) : i32 = (%[[VAL_17]]) to (%[[VAL_18]]) inclusive step (%[[VAL_19]]) {
! CHECK: %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFsEc"} : (!fir.ref<!fir.box<!fir.array<?xi32>>>) -> (!fir.ref<!fir.box<!fir.array<?xi32>>>, !fir.ref<!fir.box<!fir.array<?xi32>>>)
-! CHECK: fir.store %[[VAL_21]] to %[[VAL_15]]#1 : !fir.ref<i32>
+! CHECK: fir.store %[[VAL_21]] to %[[VAL_16]]#1 : !fir.ref<i32>
! CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref<!fir.box<!fir.array<?xi32>>>
-! CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_16]]#0 : !fir.ref<i32>
! CHECK: %[[VAL_25:.*]] = arith.constant 0 : index
! CHECK: %[[VAL_26:.*]]:3 = fir.box_dims %[[VAL_23]], %[[VAL_25]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
! CHECK: %[[VAL_27:.*]] = fir.shape %[[VAL_26]]#1 : (index) -> !fir.shape<1>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90
index c984ab61bedb3b..d881ff8c1a026a 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90
@@ -79,18 +79,18 @@ subroutine reduce(r)
! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFFreduceEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %{{[0-9]+}} {fortran_attrs = {{.*}}, uniq_name = "_QFFreduceEr"} : (!fir.box<!fir.array<?xf64>>, !fir.dscope) -> (!fir.box<!fir.array<?xf64>>, !fir.box<!fir.array<?xf64>>)
! CHECK: omp.parallel {
-! CHECK: %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
-! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFFreduceEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK: %[[VAL_6:.*]] = fir.alloca !fir.box<!fir.array<?xf64>>
-! CHECK: fir.store %[[VAL_3]]#1 to %[[VAL_6]] : !fir.ref<!fir.box<!fir.array<?xf64>>>
+! CHECK: %[[VAL_4:.*]] = fir.alloca !fir.box<!fir.array<?xf64>>
+! CHECK: fir.store %[[VAL_3]]#1 to %[[VAL_4]] : !fir.ref<!fir.box<!fir.array<?xf64>>>
+! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFFreduceEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[VAL_7:.*]] = arith.constant 0 : i32
! CHECK: %[[VAL_8:.*]] = arith.constant 10 : i32
! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_Uxf64 %[[VAL_6]] -> %[[VAL_10:.*]] : !fir.ref<!fir.box<!fir.array<?xf64>>>) {
+! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_Uxf64 %[[VAL_4]] -> %[[VAL_10:.*]] : !fir.ref<!fir.box<!fir.array<?xf64>>>) {
! CHECK-NEXT: omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {fortran_attrs = {{.*}}, uniq_name = "_QFFreduceEr"} : (!fir.ref<!fir.box<!fir.array<?xf64>>>) -> (!fir.ref<!fir.box<!fir.array<?xf64>>>, !fir.ref<!fir.box<!fir.array<?xf64>>>)
-! CHECK: fir.store %[[VAL_11]] to %[[VAL_5]]#1 : !fir.ref<i32>
-! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
! CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i32) -> f64
! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<!fir.box<!fir.array<?xf64>>>
! CHECK: %[[VAL_16:.*]] = arith.constant 1 : index
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-array.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-array.f90
index 43e4c86b6bade2..afdd486c7be2a4 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-array.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-array.f90
@@ -73,24 +73,24 @@ program reduce
! CHECK: %[[VAL_4:.*]] = fir.shape %[[VAL_3]] : (index) -> !fir.shape<1>
! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_2]](%[[VAL_4]]) {uniq_name = "_QFEr"} : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<2xi32>>, !fir.ref<!fir.array<2xi32>>)
! CHECK: omp.parallel {
-! CHECK: %[[VAL_6:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
-! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK: %[[VAL_8:.*]] = fir.embox %[[VAL_5]]#0(%[[VAL_4]]) : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xi32>>
-! CHECK: %[[VAL_9:.*]] = fir.alloca !fir.box<!fir.array<2xi32>>
-! CHECK: fir.store %[[VAL_8]] to %[[VAL_9]] : !fir.ref<!fir.box<!fir.array<2xi32>>>
+! CHECK: %[[VAL_6:.*]] = fir.embox %[[VAL_5]]#0(%[[VAL_4]]) : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xi32>>
+! CHECK: %[[VAL_7:.*]] = fir.alloca !fir.box<!fir.array<2xi32>>
+! CHECK: fir.store %[[VAL_6]] to %[[VAL_7]] : !fir.ref<!fir.box<!fir.array<2xi32>>>
+! CHECK: %[[VAL_8:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[VAL_10:.*]] = arith.constant 0 : i32
! CHECK: %[[VAL_11:.*]] = arith.constant 10 : i32
! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32
-! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_2xi32 %[[VAL_9]] -> %[[VAL_13:.*]] : !fir.ref<!fir.box<!fir.array<2xi32>>>) {
+! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_2xi32 %[[VAL_7]] -> %[[VAL_13:.*]] : !fir.ref<!fir.box<!fir.array<2xi32>>>) {
! CHECK-NEXT: omp.loop_nest (%[[VAL_14:.*]]) : i32 = (%[[VAL_10]]) to (%[[VAL_11]]) inclusive step (%[[VAL_12]]) {
! CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_13]] {uniq_name = "_QFEr"} : (!fir.ref<!fir.box<!fir.array<2xi32>>>) -> (!fir.ref<!fir.box<!fir.array<2xi32>>>, !fir.ref<!fir.box<!fir.array<2xi32>>>)
-! CHECK: fir.store %[[VAL_14]] to %[[VAL_7]]#1 : !fir.ref<i32>
-! CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32>
+! CHECK: fir.store %[[VAL_14]] to %[[VAL_9]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_9]]#0 : !fir.ref<i32>
! CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<!fir.box<!fir.array<2xi32>>>
! CHECK: %[[VAL_18:.*]] = arith.constant 1 : index
! CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_17]] (%[[VAL_18]]) : (!fir.box<!fir.array<2xi32>>, index) -> !fir.ref<i32>
! CHECK: hlfir.assign %[[VAL_16]] to %[[VAL_19]] : i32, !fir.ref<i32>
-! CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_9]]#0 : !fir.ref<i32>
! CHECK: %[[VAL_21:.*]] = arith.constant 0 : i32
! CHECK: %[[VAL_22:.*]] = arith.subi %[[VAL_21]], %[[VAL_20]] : i32
! CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<!fir.box<!fir.array<2xi32>>>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90
index be5273ea36c99f..ee77332e9b412a 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90
@@ -73,23 +73,23 @@ program reduce
! CHECK: %[[VAL_4:.*]] = fir.shape %[[VAL_3]] : (index) -> !fir.shape<1>
! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_2]](%[[VAL_4]]) {uniq_name = "_QFEr"} : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<2xi32>>, !fir.ref<!fir.array<2xi32>>)
! CHECK: omp.parallel {
-! CHECK: %[[VAL_6:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
-! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK: %[[VAL_8:.*]] = fir.embox %[[VAL_5]]#0(%[[VAL_4]]) : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xi32>>
-! CHECK: %[[VAL_9:.*]] = fir.alloca !fir.box<!fir.array<2xi32>>
-! CHECK: fir.store %[[VAL_8]] to %[[VAL_9]] : !fir.ref<!fir.box<!fir.array<2xi32>>>
+! CHECK: %[[VAL_6:.*]] = fir.embox %[[VAL_5]]#0(%[[VAL_4]]) : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xi32>>
+! CHECK: %[[VAL_7:.*]] = fir.alloca !fir.box<!fir.array<2xi32>>
+! CHECK: fir.store %[[VAL_6]] to %[[VAL_7]] : !fir.ref<!fir.box<!fir.array<2xi32>>>
+! CHECK: %[[VAL_8:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[VAL_10:.*]] = arith.constant 0 : i32
! CHECK: %[[VAL_11:.*]] = arith.constant 10 : i32
! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32
-! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_2xi32 %[[VAL_9]] -> %[[VAL_13:.*]] : !fir.ref<!fir.box<!fir.array<2xi32>>>) {
+! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_2xi32 %[[VAL_7]] -> %[[VAL_13:.*]] : !fir.ref<!fir.box<!fir.array<2xi32>>>) {
! CHECK-NEXT: omp.loop_nest (%[[VAL_14:.*]]) : i32 = (%[[VAL_10]]) to (%[[VAL_11]]) inclusive step (%[[VAL_12]]) {
! CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_13]] {uniq_name = "_QFEr"} : (!fir.ref<!fir.box<!fir.array<2xi32>>>) -> (!fir.ref<!fir.box<!fir.array<2xi32>>>, !fir.ref<!fir.box<!fir.array<2xi32>>>)
-! CHECK: fir.store %[[VAL_14]] to %[[VAL_7]]#1 : !fir.ref<i32>
+! CHECK: fir.store %[[VAL_14]] to %[[VAL_9]]#1 : !fir.ref<i32>
! CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<!fir.box<!fir.array<2xi32>>>
! CHECK: %[[VAL_17:.*]] = arith.constant 1 : index
! CHECK: %[[VAL_18:.*]] = hlfir.designate %[[VAL_16]] (%[[VAL_17]]) : (!fir.box<!fir.array<2xi32>>, index) -> !fir.ref<i32>
! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_18]] : !fir.ref<i32>
-! CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_9]]#0 : !fir.ref<i32>
! CHECK: %[[VAL_21:.*]] = arith.addi %[[VAL_19]], %[[VAL_20]] : i32
! CHECK: %[[VAL_22:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<!fir.box<!fir.array<2xi32>>>
! CHECK: %[[VAL_23:.*]] = arith.constant 1 : index
@@ -99,7 +99,7 @@ program reduce
! CHECK: %[[VAL_26:.*]] = arith.constant 2 : index
! CHECK: %[[VAL_27:.*]] = hlfir.designate %[[VAL_25]] (%[[VAL_26]]) : (!fir.box<!fir.array<2xi32>>, index) -> !fir.ref<i32>
! CHECK: %[[VAL_28:.*]] = fir.load %[[VAL_27]] : !fir.ref<i32>
-! CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_9]]#0 : !fir.ref<i32>
! CHECK: %[[VAL_30:.*]] = arith.subi %[[VAL_28]], %[[VAL_29]] : i32
! CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<!fir.box<!fir.array<2xi32>>>
! CHECK: %[[VAL_32:.*]] = arith.constant 2 : index
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-multiple-clauses.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-multiple-clauses.f90
index db4b4d33da7579..3e93e915bcd89c 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-multiple-clauses.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-multiple-clauses.f90
@@ -109,23 +109,23 @@ program main
! CHECK: %[[VAL_10:.*]] = arith.constant 0.000000e+00 : f64
! CHECK: hlfir.assign %[[VAL_10]] to %[[VAL_4]]#0 : f64, !fir.ref<!fir.array<3x3xf64>>
! CHECK: omp.parallel {
-! CHECK: %[[VAL_11:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
-! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK: %[[VAL_13:.*]] = fir.embox %[[VAL_4]]#0(%[[VAL_3]]) : (!fir.ref<!fir.array<3x3xf64>>, !fir.shape<2>) -> !fir.box<!fir.array<3x3xf64>>
-! CHECK: %[[VAL_14:.*]] = fir.alloca !fir.box<!fir.array<3x3xf64>>
-! CHECK: fir.store %[[VAL_13]] to %[[VAL_14]] : !fir.ref<!fir.box<!fir.array<3x3xf64>>>
+! CHECK: %[[VAL_11:.*]] = fir.embox %[[VAL_4]]#0(%[[VAL_3]]) : (!fir.ref<!fir.array<3x3xf64>>, !fir.shape<2>) -> !fir.box<!fir.array<3x3xf64>>
+! CHECK: %[[VAL_12:.*]] = fir.alloca !fir.box<!fir.array<3x3xf64>>
+! CHECK: fir.store %[[VAL_11]] to %[[VAL_12]] : !fir.ref<!fir.box<!fir.array<3x3xf64>>>
+! CHECK: %[[VAL_13:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
+! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_13]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[VAL_15:.*]] = arith.constant 1 : i32
! CHECK: %[[VAL_16:.*]] = arith.constant 10 : i32
! CHECK: %[[VAL_17:.*]] = arith.constant 1 : i32
-! CHECK: omp.wsloop reduction(@add_reduction_f64 %[[VAL_8]]#0 -> %[[VAL_18:.*]] : !fir.ref<f64>, byref @add_reduction_byref_box_3x3xf64 %[[VAL_14]] -> %[[VAL_19:.*]] : !fir.ref<!fir.box<!fir.array<3x3xf64>>>) {
+! CHECK: omp.wsloop reduction(@add_reduction_f64 %[[VAL_8]]#0 -> %[[VAL_18:.*]] : !fir.ref<f64>, byref @add_reduction_byref_box_3x3xf64 %[[VAL_12]] -> %[[VAL_19:.*]] : !fir.ref<!fir.box<!fir.array<3x3xf64>>>) {
! CHECK: omp.loop_nest (%[[VAL_20:.*]]) : i32 = (%[[VAL_15]]) to (%[[VAL_16]]) inclusive step (%[[VAL_17]]) {
! CHECK: %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFEscalar"} : (!fir.ref<f64>) -> (!fir.ref<f64>, !fir.ref<f64>)
! CHECK: %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_19]] {uniq_name = "_QFEarray"} : (!fir.ref<!fir.box<!fir.array<3x3xf64>>>) -> (!fir.ref<!fir.box<!fir.array<3x3xf64>>>, !fir.ref<!fir.box<!fir.array<3x3xf64>>>)
-! CHECK: fir.store %[[VAL_20]] to %[[VAL_12]]#1 : !fir.ref<i32>
-! CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK: fir.store %[[VAL_20]] to %[[VAL_14]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
! CHECK: %[[VAL_24:.*]] = fir.convert %[[VAL_23]] : (i32) -> f64
! CHECK: hlfir.assign %[[VAL_24]] to %[[VAL_21]]#0 : f64, !fir.ref<f64>
-! CHECK: %[[VAL_25:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_25:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
! CHECK: %[[VAL_26:.*]] = arith.constant 1 : i32
! CHECK: %[[VAL_27:.*]] = arith.addi %[[VAL_25]], %[[VAL_26]] : i32
! CHECK: %[[VAL_28:.*]] = fir.convert %[[VAL_27]] : (i32) -> f64
@@ -134,7 +134,7 @@ program main
! CHECK: %[[VAL_31:.*]] = arith.constant 1 : index
! CHECK: %[[VAL_32:.*]] = hlfir.designate %[[VAL_29]] (%[[VAL_30]], %[[VAL_31]]) : (!fir.box<!fir.array<3x3xf64>>, index, index) -> !fir.ref<f64>
! CHECK: hlfir.assign %[[VAL_28]] to %[[VAL_32]] : f64, !fir.ref<f64>
-! CHECK: %[[VAL_33:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_33:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
! CHECK: %[[VAL_34:.*]] = arith.constant 2 : i32
! CHECK: %[[VAL_35:.*]] = arith.addi %[[VAL_33]], %[[VAL_34]] : i32
! CHECK: %[[VAL_36:.*]] = fir.convert %[[VAL_35]] : (i32) -> f64
@@ -143,7 +143,7 @@ program main
! CHECK: %[[VAL_39:.*]] = arith.constant 2 : index
! CHECK: %[[VAL_40:.*]] = hlfir.designate %[[VAL_37]] (%[[VAL_38]], %[[VAL_39]]) : (!fir.box<!fir.array<3x3xf64>>, index, index) -> !fir.ref<f64>
! CHECK: hlfir.assign %[[VAL_36]] to %[[VAL_40]] : f64, !fir.ref<f64>
-! CHECK: %[[VAL_41:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_41:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
! CHECK: %[[VAL_42:.*]] = arith.constant 3 : i32
! CHECK: %[[VAL_43:.*]] = arith.addi %[[VAL_41]], %[[VAL_42]] : i32
! CHECK: %[[VAL_44:.*]] = fir.convert %[[VAL_43]] : (i32) -> f64
More information about the flang-commits
mailing list