[llvm-branch-commits] [flang] [mlir] [MLIR][Flang][OpenMP] Make omp.wsloop into a loop wrapper (PR #88403)

Sergio Afonso via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu Apr 11 08:55:56 PDT 2024


https://github.com/skatrak created https://github.com/llvm/llvm-project/pull/88403

This patch updates the definition of `omp.wsloop` to enforce the restrictions of a wrapper operation. Given the widespread use of this operation, the changes introduced in this patch are several:

- Update the MLIR definition of the `omp.wsloop`, as well as parser/printer, builder and verifier.
- Update verifiers for `omp.ordered.region`, `omp.cancel` and `omp.cancellation_point` to correctly check for a parent `omp.wsloop`.
- Update MLIR to LLVM IR translation of `omp.wsloop` to keep working after the change in representation. Another patch should be created to reduce the current code duplication between `omp.wsloop` and `omp.simd` after introducing a common `omp.loop_nest` operation.
- Update the `scf.parallel` lowering pass to OpenMP to produce the new expected representation.
- Update flang lowering to implement `omp.wsloop` representation changes, including changes to `lastprivate`, and `reduction` handling to avoid adding operations into a wrapper and attach entry block arguments to the right operation.
- Fix unit tests broken due to the representation change.

>From 50e4f785c005533656a032b121dbc93b1b87709f Mon Sep 17 00:00:00 2001
From: Sergio Afonso <safonsof at amd.com>
Date: Thu, 11 Apr 2024 16:36:15 +0100
Subject: [PATCH] [MLIR][Flang][OpenMP] Make omp.wsloop into a loop wrapper

This patch updates the definition of `omp.wsloop` to enforce the restrictions
of a wrapper operation. Given the widespread use of this operation, the changes
introduced in this patch are several:

- Update the MLIR definition of the `omp.wsloop`, as well as parser/printer,
builder and verifier.
- Update verifiers for `omp.ordered.region`, `omp.cancel` and
`omp.cancellation_point` to correctly check for a parent `omp.wsloop`.
- Update MLIR to LLVM IR translation of `omp.wsloop` to keep working after the
change in representation. Another patch should be created to reduce the current
code duplication between `omp.wsloop` and `omp.simd` after introducing a common
`omp.loop_nest` operation.
- Update the `scf.parallel` lowering pass to OpenMP to produce the new expected
representation.
- Update flang lowering to implement `omp.wsloop` representation changes,
including changes to `lastprivate`, and `reduction` handling to avoid adding
operations into a wrapper and attach entry block arguments to the right
operation.
- Fix unit tests broken due to the representation change.
---
 .../lib/Lower/OpenMP/DataSharingProcessor.cpp |  48 +-
 flang/lib/Lower/OpenMP/OpenMP.cpp             | 117 +--
 .../Fir/convert-to-llvm-openmp-and-fir.fir    | 108 +--
 flang/test/Lower/OpenMP/FIR/copyin.f90        |  16 +-
 .../OpenMP/FIR/lastprivate-commonblock.f90    |   5 +-
 flang/test/Lower/OpenMP/FIR/location.f90      |  17 +-
 .../parallel-lastprivate-clause-scalar.f90    |  48 +-
 .../FIR/parallel-private-clause-fixes.f90     |  49 +-
 .../OpenMP/FIR/parallel-private-clause.f90    | 114 +--
 .../OpenMP/FIR/parallel-wsloop-firstpriv.f90  |  12 +-
 .../test/Lower/OpenMP/FIR/parallel-wsloop.f90 | 128 +--
 .../Lower/OpenMP/FIR/stop-stmt-in-region.f90  |  39 +-
 flang/test/Lower/OpenMP/FIR/target.f90        |   5 +-
 flang/test/Lower/OpenMP/FIR/unstructured.f90  | 199 ++---
 flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90 |  47 +-
 .../test/Lower/OpenMP/FIR/wsloop-collapse.f90 |  29 +-
 .../Lower/OpenMP/FIR/wsloop-monotonic.f90     |  30 +-
 .../Lower/OpenMP/FIR/wsloop-nonmonotonic.f90  |  31 +-
 .../test/Lower/OpenMP/FIR/wsloop-ordered.f90  |  18 +-
 .../OpenMP/FIR/wsloop-reduction-add-byref.f90 | 191 +++--
 .../Lower/OpenMP/FIR/wsloop-reduction-add.f90 | 191 +++--
 .../FIR/wsloop-reduction-iand-byref.f90       |   4 +-
 .../OpenMP/FIR/wsloop-reduction-iand.f90      |   4 +-
 .../FIR/wsloop-reduction-ieor-byref.f90       |   4 +-
 .../OpenMP/FIR/wsloop-reduction-ieor.f90      |   4 +-
 .../OpenMP/FIR/wsloop-reduction-ior-byref.f90 |   4 +-
 .../Lower/OpenMP/FIR/wsloop-reduction-ior.f90 |   4 +-
 .../wsloop-reduction-logical-eqv-byref.f90    | 144 ++--
 .../FIR/wsloop-reduction-logical-eqv.f90      | 144 ++--
 .../wsloop-reduction-logical-neqv-byref.f90   | 144 ++--
 .../FIR/wsloop-reduction-logical-neqv.f90     | 144 ++--
 .../OpenMP/FIR/wsloop-reduction-max-byref.f90 |  31 +-
 .../Lower/OpenMP/FIR/wsloop-reduction-max.f90 |  31 +-
 .../OpenMP/FIR/wsloop-reduction-min-byref.f90 |  32 +-
 .../Lower/OpenMP/FIR/wsloop-reduction-min.f90 |  32 +-
 flang/test/Lower/OpenMP/FIR/wsloop-simd.f90   |  29 +-
 .../test/Lower/OpenMP/FIR/wsloop-variable.f90 | 172 ++--
 flang/test/Lower/OpenMP/FIR/wsloop.f90        |  66 +-
 .../Todo/omp-default-clause-inner-loop.f90    |   5 +-
 flang/test/Lower/OpenMP/copyin.f90            |  28 +-
 .../Lower/OpenMP/default-clause-byref.f90     |   5 +-
 flang/test/Lower/OpenMP/default-clause.f90    |   5 +-
 flang/test/Lower/OpenMP/hlfir-wsloop.f90      |  12 +-
 .../Lower/OpenMP/lastprivate-commonblock.f90  |  59 +-
 flang/test/Lower/OpenMP/lastprivate-iv.f90    |  90 ++-
 flang/test/Lower/OpenMP/location.f90          |  17 +-
 .../parallel-lastprivate-clause-scalar.f90    |  48 +-
 .../OpenMP/parallel-private-clause-fixes.f90  |  49 +-
 .../Lower/OpenMP/parallel-private-clause.f90  | 106 +--
 .../OpenMP/parallel-wsloop-firstpriv.f90      |  10 +-
 flang/test/Lower/OpenMP/parallel-wsloop.f90   | 138 ++--
 .../test/Lower/OpenMP/stop-stmt-in-region.f90 |  39 +-
 flang/test/Lower/OpenMP/target.f90            |   5 +-
 flang/test/Lower/OpenMP/unstructured.f90      | 199 ++---
 flang/test/Lower/OpenMP/wsloop-chunks.f90     |  47 +-
 flang/test/Lower/OpenMP/wsloop-collapse.f90   |  29 +-
 flang/test/Lower/OpenMP/wsloop-monotonic.f90  |  18 +-
 .../test/Lower/OpenMP/wsloop-nonmonotonic.f90 |  19 +-
 flang/test/Lower/OpenMP/wsloop-ordered.f90    |  18 +-
 .../OpenMP/wsloop-reduction-add-byref.f90     | 219 +++---
 .../wsloop-reduction-add-hlfir-byref.f90      |  18 +-
 .../OpenMP/wsloop-reduction-add-hlfir.f90     |  18 +-
 .../Lower/OpenMP/wsloop-reduction-add.f90     | 219 +++---
 .../Lower/OpenMP/wsloop-reduction-array.f90   |  35 +-
 .../Lower/OpenMP/wsloop-reduction-array2.f90  |  51 +-
 .../OpenMP/wsloop-reduction-iand-byref.f90    |  24 +-
 .../Lower/OpenMP/wsloop-reduction-iand.f90    |  24 +-
 .../OpenMP/wsloop-reduction-ieor-byref.f90    |   6 +-
 .../Lower/OpenMP/wsloop-reduction-ieor.f90    |   6 +-
 .../OpenMP/wsloop-reduction-ior-byref.f90     |  24 +-
 .../Lower/OpenMP/wsloop-reduction-ior.f90     |  24 +-
 .../wsloop-reduction-logical-and-byref.f90    | 134 ++--
 .../OpenMP/wsloop-reduction-logical-and.f90   | 134 ++--
 .../wsloop-reduction-logical-eqv-byref.f90    | 134 ++--
 .../OpenMP/wsloop-reduction-logical-eqv.f90   | 134 ++--
 .../wsloop-reduction-logical-neqv-byref.f90   | 134 ++--
 .../OpenMP/wsloop-reduction-logical-neqv.f90  | 134 ++--
 .../wsloop-reduction-logical-or-byref.f90     | 134 ++--
 .../OpenMP/wsloop-reduction-logical-or.f90    | 134 ++--
 .../OpenMP/wsloop-reduction-max-byref.f90     |  90 ++-
 .../wsloop-reduction-max-hlfir-byref.f90      |  26 +-
 .../OpenMP/wsloop-reduction-max-hlfir.f90     |  26 +-
 .../Lower/OpenMP/wsloop-reduction-max.f90     |  90 ++-
 .../OpenMP/wsloop-reduction-min-byref.f90     |  92 ++-
 .../Lower/OpenMP/wsloop-reduction-min.f90     |  92 ++-
 .../Lower/OpenMP/wsloop-reduction-min2.f90    |  16 +-
 .../OpenMP/wsloop-reduction-mul-byref.f90     | 212 ++---
 .../Lower/OpenMP/wsloop-reduction-mul.f90     | 212 ++---
 .../Lower/OpenMP/wsloop-reduction-multi.f90   |  49 +-
 flang/test/Lower/OpenMP/wsloop-simd.f90       |  29 +-
 .../test/Lower/OpenMP/wsloop-unstructured.f90 |  39 +-
 flang/test/Lower/OpenMP/wsloop-variable.f90   | 167 ++--
 flang/test/Lower/OpenMP/wsloop.f90            |  72 +-
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td |  56 +-
 .../Conversion/SCFToOpenMP/SCFToOpenMP.cpp    |  52 +-
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp  | 132 ++--
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      |  66 +-
 mlir/test/CAPI/execution_engine.c             |   7 +-
 .../OpenMPToLLVM/convert-to-llvmir.mlir       |  64 +-
 .../Conversion/SCFToOpenMP/reductions.mlir    |   4 +
 .../Conversion/SCFToOpenMP/scf-to-openmp.mlir |  31 +-
 .../Dialect/LLVMIR/legalize-for-export.mlir   |  19 +-
 mlir/test/Dialect/OpenMP/invalid.mlir         | 257 +++---
 mlir/test/Dialect/OpenMP/ops.mlir             | 562 +++++++------
 .../LLVMIR/omptarget-parallel-wsloop.mlir     |  11 +-
 .../LLVMIR/omptarget-wsloop-collapsed.mlir    |  17 +-
 mlir/test/Target/LLVMIR/omptarget-wsloop.mlir |  18 +-
 mlir/test/Target/LLVMIR/openmp-llvm.mlir      | 741 ++++++++++--------
 mlir/test/Target/LLVMIR/openmp-nested.mlir    |  30 +-
 mlir/test/Target/LLVMIR/openmp-reduction.mlir | 113 +--
 110 files changed, 4678 insertions(+), 3835 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index e114ab9f4548ab..645c351ac6c08c 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -133,8 +133,14 @@ void DataSharingProcessor::insertBarrier() {
 }
 
 void DataSharingProcessor::insertLastPrivateCompare(mlir::Operation *op) {
+  mlir::OpBuilder::InsertionGuard guard(firOpBuilder);
+  mlir::omp::LoopNestOp loopOp;
+  if (auto wrapper = mlir::dyn_cast<mlir::omp::LoopWrapperInterface>(op))
+    loopOp = wrapper.isWrapper()
+                 ? mlir::cast<mlir::omp::LoopNestOp>(wrapper.getWrappedLoop())
+                 : nullptr;
+
   bool cmpCreated = false;
-  mlir::OpBuilder::InsertPoint localInsPt = firOpBuilder.saveInsertionPoint();
   for (const omp::Clause &clause : clauses) {
     if (clause.id != llvm::omp::OMPC_lastprivate)
       continue;
@@ -213,18 +219,20 @@ void DataSharingProcessor::insertLastPrivateCompare(mlir::Operation *op) {
       // Update the original variable just before exiting the worksharing
       // loop. Conversion as follows:
       //
-      //                       omp.wsloop {
-      // omp.wsloop {            ...
-      //    ...                  store
-      //    store       ===>     %v = arith.addi %iv, %step
-      //    omp.yield            %cmp = %step < 0 ? %v < %ub : %v > %ub
-      // }                       fir.if %cmp {
-      //                           fir.store %v to %loopIV
-      //                           ^%lpv_update_blk:
-      //                         }
-      //                         omp.yield
-      //                       }
-      //
+      // omp.wsloop {             omp.wsloop {
+      //   omp.loop_nest {          omp.loop_nest {
+      //     ...                      ...
+      //     store          ===>      store
+      //     omp.yield                %v = arith.addi %iv, %step
+      //   }                          %cmp = %step < 0 ? %v < %ub : %v > %ub
+      //   omp.terminator             fir.if %cmp {
+      // }                              fir.store %v to %loopIV
+      //                                ^%lpv_update_blk:
+      //                              }
+      //                              omp.yield
+      //                            }
+      //                            omp.terminator
+      //                          }
 
       // Only generate the compare once in presence of multiple LastPrivate
       // clauses.
@@ -232,14 +240,13 @@ void DataSharingProcessor::insertLastPrivateCompare(mlir::Operation *op) {
         continue;
       cmpCreated = true;
 
-      mlir::Location loc = op->getLoc();
-      mlir::Operation *lastOper = op->getRegion(0).back().getTerminator();
+      mlir::Location loc = loopOp.getLoc();
+      mlir::Operation *lastOper = loopOp.getRegion().back().getTerminator();
       firOpBuilder.setInsertionPoint(lastOper);
 
-      mlir::Value iv = op->getRegion(0).front().getArguments()[0];
-      mlir::Value ub =
-          mlir::dyn_cast<mlir::omp::WsloopOp>(op).getUpperBound()[0];
-      mlir::Value step = mlir::dyn_cast<mlir::omp::WsloopOp>(op).getStep()[0];
+      mlir::Value iv = loopOp.getIVs()[0];
+      mlir::Value ub = loopOp.getUpperBound()[0];
+      mlir::Value step = loopOp.getStep()[0];
 
       // v = iv + step
       // cmp = step < 0 ? v < ub : v > ub
@@ -258,7 +265,7 @@ void DataSharingProcessor::insertLastPrivateCompare(mlir::Operation *op) {
       auto ifOp = firOpBuilder.create<fir::IfOp>(loc, cmpOp, /*else*/ false);
       firOpBuilder.setInsertionPointToStart(&ifOp.getThenRegion().front());
       assert(loopIV && "loopIV was not set");
-      firOpBuilder.create<fir::StoreOp>(op->getLoc(), v, loopIV);
+      firOpBuilder.create<fir::StoreOp>(loopOp.getLoc(), v, loopIV);
       lastPrivIP = firOpBuilder.saveInsertionPoint();
     } else {
       TODO(converter.getCurrentLocation(),
@@ -266,7 +273,6 @@ void DataSharingProcessor::insertLastPrivateCompare(mlir::Operation *op) {
            "simd/worksharing-loop");
     }
   }
-  firOpBuilder.restoreInsertionPoint(localInsPt);
 }
 
 void DataSharingProcessor::collectSymbols(
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 1800fcb19dcd2e..b21351382b6bdf 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1626,7 +1626,9 @@ genOmpFlush(Fortran::lower::AbstractConverter &converter,
 static llvm::SmallVector<const Fortran::semantics::Symbol *>
 genLoopVars(mlir::Operation *op, Fortran::lower::AbstractConverter &converter,
             mlir::Location &loc,
-            llvm::ArrayRef<const Fortran::semantics::Symbol *> args) {
+            llvm::ArrayRef<const Fortran::semantics::Symbol *> args,
+            llvm::ArrayRef<const Fortran::semantics::Symbol *> wrapperSyms = {},
+            llvm::ArrayRef<mlir::BlockArgument> wrapperArgs = {}) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   auto &region = op->getRegion(0);
 
@@ -1637,6 +1639,14 @@ genLoopVars(mlir::Operation *op, Fortran::lower::AbstractConverter &converter,
   llvm::SmallVector<mlir::Type> tiv(args.size(), loopVarType);
   llvm::SmallVector<mlir::Location> locs(args.size(), loc);
   firOpBuilder.createBlock(&region, {}, tiv, locs);
+
+  // Bind the entry block arguments of parent wrappers to the corresponding
+  // symbols. Do it here so that any hlfir.declare operations created as a
+  // result are inserted inside of the omp.loop_nest rather than the wrapper
+  // operations.
+  for (auto [arg, prv] : llvm::zip_equal(wrapperSyms, wrapperArgs))
+    converter.bindSymbol(*arg, prv);
+
   // The argument is not currently in memory, so make a temporary for the
   // argument, and store it there, then bind that location to the argument.
   mlir::Operation *storeOp = nullptr;
@@ -1650,58 +1660,6 @@ genLoopVars(mlir::Operation *op, Fortran::lower::AbstractConverter &converter,
   return llvm::SmallVector<const Fortran::semantics::Symbol *>(args);
 }
 
-static llvm::SmallVector<const Fortran::semantics::Symbol *>
-genLoopAndReductionVars(
-    mlir::Operation *op, Fortran::lower::AbstractConverter &converter,
-    mlir::Location &loc,
-    llvm::ArrayRef<const Fortran::semantics::Symbol *> loopArgs,
-    llvm::ArrayRef<const Fortran::semantics::Symbol *> reductionArgs,
-    llvm::ArrayRef<mlir::Type> reductionTypes) {
-  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-
-  llvm::SmallVector<mlir::Type> blockArgTypes;
-  llvm::SmallVector<mlir::Location> blockArgLocs;
-  blockArgTypes.reserve(loopArgs.size() + reductionArgs.size());
-  blockArgLocs.reserve(blockArgTypes.size());
-  mlir::Block *entryBlock;
-
-  if (loopArgs.size()) {
-    std::size_t loopVarTypeSize = 0;
-    for (const Fortran::semantics::Symbol *arg : loopArgs)
-      loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size());
-    mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize);
-    std::fill_n(std::back_inserter(blockArgTypes), loopArgs.size(),
-                loopVarType);
-    std::fill_n(std::back_inserter(blockArgLocs), loopArgs.size(), loc);
-  }
-  if (reductionArgs.size()) {
-    llvm::copy(reductionTypes, std::back_inserter(blockArgTypes));
-    std::fill_n(std::back_inserter(blockArgLocs), reductionArgs.size(), loc);
-  }
-  entryBlock = firOpBuilder.createBlock(&op->getRegion(0), {}, blockArgTypes,
-                                        blockArgLocs);
-  // The argument is not currently in memory, so make a temporary for the
-  // argument, and store it there, then bind that location to the argument.
-  if (loopArgs.size()) {
-    mlir::Operation *storeOp = nullptr;
-    for (auto [argIndex, argSymbol] : llvm::enumerate(loopArgs)) {
-      mlir::Value indexVal =
-          fir::getBase(op->getRegion(0).front().getArgument(argIndex));
-      storeOp =
-          createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol);
-    }
-    firOpBuilder.setInsertionPointAfter(storeOp);
-  }
-  // Bind the reduction arguments to their block arguments
-  for (auto [arg, prv] : llvm::zip_equal(
-           reductionArgs,
-           llvm::drop_begin(entryBlock->getArguments(), loopArgs.size()))) {
-    converter.bindSymbol(*arg, prv);
-  }
-
-  return llvm::SmallVector<const Fortran::semantics::Symbol *>(loopArgs);
-}
-
 static void createSimd(Fortran::lower::AbstractConverter &converter,
                        Fortran::semantics::SemanticsContext &semaCtx,
                        Fortran::lower::pft::Evaluation &eval,
@@ -1797,28 +1755,26 @@ static void createWsloop(Fortran::lower::AbstractConverter &converter,
   if (ReductionProcessor::doReductionByRef(reductionVars))
     byrefOperand = firOpBuilder.getUnitAttr();
 
-  auto wsLoopOp = firOpBuilder.create<mlir::omp::WsloopOp>(
-      loc, lowerBound, upperBound, step, linearVars, linearStepVars,
-      reductionVars,
+  auto wsloopOp = firOpBuilder.create<mlir::omp::WsloopOp>(
+      loc, linearVars, linearStepVars, reductionVars,
       reductionDeclSymbols.empty()
           ? nullptr
           : mlir::ArrayAttr::get(firOpBuilder.getContext(),
                                  reductionDeclSymbols),
       scheduleValClauseOperand, scheduleChunkClauseOperand,
-      /*schedule_modifiers=*/nullptr,
-      /*simd_modifier=*/nullptr, nowaitClauseOperand, byrefOperand,
-      orderedClauseOperand, orderClauseOperand,
-      /*inclusive=*/firOpBuilder.getUnitAttr());
+      /*schedule_modifiers=*/nullptr, /*simd_modifier=*/nullptr,
+      nowaitClauseOperand, byrefOperand, orderedClauseOperand,
+      orderClauseOperand);
 
   // Handle attribute based clauses.
   if (cp.processOrdered(orderedClauseOperand))
-    wsLoopOp.setOrderedValAttr(orderedClauseOperand);
+    wsloopOp.setOrderedValAttr(orderedClauseOperand);
 
   if (cp.processSchedule(scheduleValClauseOperand, scheduleModClauseOperand,
                          scheduleSimdClauseOperand)) {
-    wsLoopOp.setScheduleValAttr(scheduleValClauseOperand);
-    wsLoopOp.setScheduleModifierAttr(scheduleModClauseOperand);
-    wsLoopOp.setSimdModifierAttr(scheduleSimdClauseOperand);
+    wsloopOp.setScheduleValAttr(scheduleValClauseOperand);
+    wsloopOp.setScheduleModifierAttr(scheduleModClauseOperand);
+    wsloopOp.setSimdModifierAttr(scheduleSimdClauseOperand);
   }
   // In FORTRAN `nowait` clause occur at the end of `omp do` directive.
   // i.e
@@ -1828,23 +1784,36 @@ static void createWsloop(Fortran::lower::AbstractConverter &converter,
   if (endClauseList) {
     if (ClauseProcessor(converter, semaCtx, *endClauseList)
             .processNowait(nowaitClauseOperand))
-      wsLoopOp.setNowaitAttr(nowaitClauseOperand);
+      wsloopOp.setNowaitAttr(nowaitClauseOperand);
   }
 
+  // Create omp.wsloop wrapper and populate entry block arguments with reduction
+  // variables.
+  llvm::SmallVector<mlir::Location> reductionLocs(reductionSymbols.size(), loc);
+  mlir::Block *wsloopEntryBlock = firOpBuilder.createBlock(
+      &wsloopOp.getRegion(), {}, reductionTypes, reductionLocs);
+  firOpBuilder.setInsertionPoint(
+      Fortran::lower::genOpenMPTerminator(firOpBuilder, wsloopOp, loc));
+
+  // Create nested omp.loop_nest and fill body with loop contents.
+  auto loopOp = firOpBuilder.create<mlir::omp::LoopNestOp>(
+      loc, lowerBound, upperBound, step,
+      /*inclusive=*/firOpBuilder.getUnitAttr());
+
   auto *nestedEval = getCollapsedLoopEval(
       eval, Fortran::lower::getCollapseValue(beginClauseList));
 
   auto ivCallback = [&](mlir::Operation *op) {
-    return genLoopAndReductionVars(op, converter, loc, iv, reductionSymbols,
-                                   reductionTypes);
+    return genLoopVars(op, converter, loc, iv, reductionSymbols,
+                       wsloopEntryBlock->getArguments());
   };
 
   createBodyOfOp<mlir::omp::WsloopOp>(
-      *wsLoopOp, OpWithBodyGenInfo(converter, semaCtx, loc, *nestedEval)
-                     .setClauses(&beginClauseList)
-                     .setDataSharingProcessor(&dsp)
-                     .setReductions(&reductionSymbols, &reductionTypes)
-                     .setGenRegionEntryCb(ivCallback));
+      *loopOp, OpWithBodyGenInfo(converter, semaCtx, loc, *nestedEval)
+                   .setClauses(&beginClauseList)
+                   .setDataSharingProcessor(&dsp)
+                   .setReductions(&reductionSymbols, &reductionTypes)
+                   .setGenRegionEntryCb(ivCallback));
 }
 
 static void createSimdWsloop(
@@ -2430,8 +2399,8 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
 mlir::Operation *Fortran::lower::genOpenMPTerminator(fir::FirOpBuilder &builder,
                                                      mlir::Operation *op,
                                                      mlir::Location loc) {
-  if (mlir::isa<mlir::omp::WsloopOp, mlir::omp::DeclareReductionOp,
-                mlir::omp::AtomicUpdateOp, mlir::omp::LoopNestOp>(op))
+  if (mlir::isa<mlir::omp::AtomicUpdateOp, mlir::omp::DeclareReductionOp,
+                mlir::omp::LoopNestOp>(op))
     return builder.create<mlir::omp::YieldOp>(loc);
   return builder.create<mlir::omp::TerminatorOp>(loc);
 }
diff --git a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
index fa7979e8875afc..c7c609bbb35623 100644
--- a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
+++ b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
@@ -7,15 +7,17 @@ func.func @_QPsb1(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.ref<!
   omp.parallel  {
     %1 = fir.alloca i32 {adapt.valuebyref, pinned}
     %2 = fir.load %arg0 : !fir.ref<i32>
-    omp.wsloop nowait
-    for (%arg2) : i32 = (%c1_i32) to (%2) inclusive step (%c1_i32)  {
-      fir.store %arg2 to %1 : !fir.ref<i32>
-      %3 = fir.load %1 : !fir.ref<i32>
-      %4 = fir.convert %3 : (i32) -> i64
-      %5 = arith.subi %4, %c1_i64 : i64
-      %6 = fir.coordinate_of %arg1, %5 : (!fir.ref<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
-      fir.store %3 to %6 : !fir.ref<i32>
-      omp.yield
+    omp.wsloop nowait {
+      omp.loop_nest (%arg2) : i32 = (%c1_i32) to (%2) inclusive step (%c1_i32)  {
+        fir.store %arg2 to %1 : !fir.ref<i32>
+        %3 = fir.load %1 : !fir.ref<i32>
+        %4 = fir.convert %3 : (i32) -> i64
+        %5 = arith.subi %4, %c1_i64 : i64
+        %6 = fir.coordinate_of %arg1, %5 : (!fir.ref<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+        fir.store %3 to %6 : !fir.ref<i32>
+        omp.yield
+      }
+      omp.terminator
     }
     omp.terminator
   }
@@ -31,7 +33,7 @@ func.func @_QPsb1(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.ref<!
 // CHECK:      %[[I_VAR:.*]] = llvm.alloca %[[ONE_3]] x i32 {pinned} : (i64) -> !llvm.ptr
 // CHECK:      %[[N:.*]] = llvm.load %[[N_REF]] : !llvm.ptr -> i32
 // CHECK: omp.wsloop nowait
-// CHECK-SAME: for (%[[I:.*]]) : i32 = (%[[ONE_2]]) to (%[[N]]) inclusive step (%[[ONE_2]]) {
+// CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[ONE_2]]) to (%[[N]]) inclusive step (%[[ONE_2]]) {
 // CHECK:   llvm.store %[[I]], %[[I_VAR]] : i32, !llvm.ptr
 // CHECK:   %[[I1:.*]] = llvm.load %[[I_VAR]] : !llvm.ptr -> i32
 // CHECK:   %[[I1_EXT:.*]] = llvm.sext %[[I1]] : i32 to i64
@@ -42,6 +44,8 @@ func.func @_QPsb1(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.ref<!
 // CHECK: }
 // CHECK: omp.terminator
 // CHECK: }
+// CHECK: omp.terminator
+// CHECK: }
 // CHECK: llvm.return
 // CHECK: }
 
@@ -79,13 +83,16 @@ func.func @_QPsb(%arr: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "arr"}) {
   omp.parallel   {
     %c1 = arith.constant 1 : i32
     %c50 = arith.constant 50 : i32
-    omp.wsloop   for  (%indx) : i32 = (%c1) to (%c50) inclusive step (%c1) {
-      %1 = fir.convert %indx : (i32) -> i64
-      %c1_i64 = arith.constant 1 : i64
-      %2 = arith.subi %1, %c1_i64 : i64
-      %3 = fir.coordinate_of %arr, %2 : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
-      fir.store %indx to %3 : !fir.ref<i32>
-      omp.yield
+    omp.wsloop {
+      omp.loop_nest (%indx) : i32 = (%c1) to (%c50) inclusive step (%c1) {
+        %1 = fir.convert %indx : (i32) -> i64
+        %c1_i64 = arith.constant 1 : i64
+        %2 = arith.subi %1, %c1_i64 : i64
+        %3 = fir.coordinate_of %arr, %2 : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+        fir.store %indx to %3 : !fir.ref<i32>
+        omp.yield
+      }
+      omp.terminator
     }
     omp.terminator
   }
@@ -98,9 +105,11 @@ func.func @_QPsb(%arr: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "arr"}) {
 // CHECK:    omp.parallel   {
 // CHECK:      %[[C1:.*]] = llvm.mlir.constant(1 : i32) : i32
 // CHECK:      %[[C50:.*]] = llvm.mlir.constant(50 : i32) : i32
-// CHECK:      omp.wsloop   for  (%[[INDX:.*]]) : i32 = (%[[C1]]) to (%[[C50]]) inclusive step (%[[C1]]) {
-// CHECK:        llvm.store %[[INDX]], %{{.*}} : i32, !llvm.ptr
-// CHECK:        omp.yield
+// CHECK:      omp.wsloop {
+// CHECK-NEXT:   omp.loop_nest (%[[INDX:.*]]) : i32 = (%[[C1]]) to (%[[C50]]) inclusive step (%[[C1]]) {
+// CHECK:          llvm.store %[[INDX]], %{{.*}} : i32, !llvm.ptr
+// CHECK:          omp.yield
+// CHECK:        omp.terminator
 // CHECK:      omp.terminator
 // CHECK:    llvm.return
 
@@ -708,18 +717,20 @@ func.func @_QPsb() {
 // CHECK-SAME: %[[ARRAY_REF:.*]]: !llvm.ptr
 // CHECK:    %[[RED_ACCUMULATOR:.*]] = llvm.alloca %2 x i32 {bindc_name = "x"} : (i64) -> !llvm.ptr
 // CHECK:    omp.parallel   {
-// CHECK:      omp.wsloop   reduction(@[[EQV_REDUCTION]] %[[RED_ACCUMULATOR]] -> %[[PRV:.+]] : !llvm.ptr) for
-// CHECK:        %[[ARRAY_ELEM_REF:.*]] = llvm.getelementptr %[[ARRAY_REF]][0, %{{.*}}] : (!llvm.ptr, i64) -> !llvm.ptr
-// CHECK:        %[[ARRAY_ELEM:.*]] = llvm.load %[[ARRAY_ELEM_REF]] : !llvm.ptr -> i32
-// CHECK:        %[[LPRV:.+]] = llvm.load %[[PRV]] : !llvm.ptr -> i32
-// CHECK:        %[[ZERO_1:.*]] = llvm.mlir.constant(0 : i64) : i32
-// CHECK:        %[[ARGVAL_1:.*]] = llvm.icmp "ne" %[[LPRV]], %[[ZERO_1]] : i32
-// CHECK:        %[[ZERO_2:.*]] = llvm.mlir.constant(0 : i64) : i32
-// CHECK:        %[[ARGVAL_2:.*]] = llvm.icmp "ne" %[[ARRAY_ELEM]], %[[ZERO_2]] : i32
-// CHECK:        %[[RES:.*]] = llvm.icmp "eq" %[[ARGVAL_2]], %[[ARGVAL_1]] : i1
-// CHECK:        %[[RES_EXT:.*]] = llvm.zext %[[RES]] : i1 to i32
-// CHECK:        llvm.store %[[RES_EXT]], %[[PRV]] : i32, !llvm.ptr
-// CHECK:        omp.yield
+// CHECK:      omp.wsloop reduction(@[[EQV_REDUCTION]] %[[RED_ACCUMULATOR]] -> %[[PRV:.+]] : !llvm.ptr) {
+// CHECK-NEXT:   omp.loop_nest
+// CHECK:          %[[ARRAY_ELEM_REF:.*]] = llvm.getelementptr %[[ARRAY_REF]][0, %{{.*}}] : (!llvm.ptr, i64) -> !llvm.ptr
+// CHECK:          %[[ARRAY_ELEM:.*]] = llvm.load %[[ARRAY_ELEM_REF]] : !llvm.ptr -> i32
+// CHECK:          %[[LPRV:.+]] = llvm.load %[[PRV]] : !llvm.ptr -> i32
+// CHECK:          %[[ZERO_1:.*]] = llvm.mlir.constant(0 : i64) : i32
+// CHECK:          %[[ARGVAL_1:.*]] = llvm.icmp "ne" %[[LPRV]], %[[ZERO_1]] : i32
+// CHECK:          %[[ZERO_2:.*]] = llvm.mlir.constant(0 : i64) : i32
+// CHECK:          %[[ARGVAL_2:.*]] = llvm.icmp "ne" %[[ARRAY_ELEM]], %[[ZERO_2]] : i32
+// CHECK:          %[[RES:.*]] = llvm.icmp "eq" %[[ARGVAL_2]], %[[ARGVAL_1]] : i1
+// CHECK:          %[[RES_EXT:.*]] = llvm.zext %[[RES]] : i1 to i32
+// CHECK:          llvm.store %[[RES_EXT]], %[[PRV]] : i32, !llvm.ptr
+// CHECK:          omp.yield
+// CHECK:        omp.terminator
 // CHECK:      omp.terminator
 // CHECK:    llvm.return
 
@@ -747,21 +758,24 @@ func.func @_QPsimple_reduction(%arg0: !fir.ref<!fir.array<100x!fir.logical<4>>>
     %c1_i32 = arith.constant 1 : i32
     %c100_i32 = arith.constant 100 : i32
     %c1_i32_0 = arith.constant 1 : i32
-    omp.wsloop   reduction(@eqv_reduction %1 -> %prv : !fir.ref<!fir.logical<4>>) for  (%arg1) : i32 = (%c1_i32) to (%c100_i32) inclusive step (%c1_i32_0) {
-      fir.store %arg1 to %3 : !fir.ref<i32>
-      %4 = fir.load %3 : !fir.ref<i32>
-      %5 = fir.convert %4 : (i32) -> i64
-      %c1_i64 = arith.constant 1 : i64
-      %6 = arith.subi %5, %c1_i64 : i64
-      %7 = fir.coordinate_of %arg0, %6 : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-      %8 = fir.load %7 : !fir.ref<!fir.logical<4>>
-      %lprv = fir.load %prv : !fir.ref<!fir.logical<4>>
-      %lprv1 = fir.convert %lprv : (!fir.logical<4>) -> i1
-      %9 = fir.convert %8 : (!fir.logical<4>) -> i1
-      %10 = arith.cmpi eq, %9, %lprv1 : i1
-      %11 = fir.convert %10 : (i1) -> !fir.logical<4>
-      fir.store %11 to %prv : !fir.ref<!fir.logical<4>>
-      omp.yield
+    omp.wsloop reduction(@eqv_reduction %1 -> %prv : !fir.ref<!fir.logical<4>>) {
+      omp.loop_nest (%arg1) : i32 = (%c1_i32) to (%c100_i32) inclusive step (%c1_i32_0) {
+        fir.store %arg1 to %3 : !fir.ref<i32>
+        %4 = fir.load %3 : !fir.ref<i32>
+        %5 = fir.convert %4 : (i32) -> i64
+        %c1_i64 = arith.constant 1 : i64
+        %6 = arith.subi %5, %c1_i64 : i64
+        %7 = fir.coordinate_of %arg0, %6 : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+        %8 = fir.load %7 : !fir.ref<!fir.logical<4>>
+        %lprv = fir.load %prv : !fir.ref<!fir.logical<4>>
+        %lprv1 = fir.convert %lprv : (!fir.logical<4>) -> i1
+        %9 = fir.convert %8 : (!fir.logical<4>) -> i1
+        %10 = arith.cmpi eq, %9, %lprv1 : i1
+        %11 = fir.convert %10 : (i1) -> !fir.logical<4>
+        fir.store %11 to %prv : !fir.ref<!fir.logical<4>>
+        omp.yield
+      }
+      omp.terminator
     }
     omp.terminator
   }
diff --git a/flang/test/Lower/OpenMP/FIR/copyin.f90 b/flang/test/Lower/OpenMP/FIR/copyin.f90
index 20023a81977aef..e256404d3d55ce 100644
--- a/flang/test/Lower/OpenMP/FIR/copyin.f90
+++ b/flang/test/Lower/OpenMP/FIR/copyin.f90
@@ -145,10 +145,13 @@ subroutine copyin_derived_type()
 ! CHECK:           %[[VAL_6:.*]] = arith.constant 1 : i32
 ! CHECK:           %[[VAL_7:.*]] = fir.load %[[VAL_4]] : !fir.ref<i32>
 ! CHECK:           %[[VAL_8:.*]] = arith.constant 1 : i32
-! CHECK:           omp.wsloop   for  (%[[VAL_9:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
-! CHECK:             fir.store %[[VAL_9]] to %[[VAL_3]] : !fir.ref<i32>
-! CHECK:             fir.call @_QPsub4(%[[VAL_4]]) {{.*}}: (!fir.ref<i32>) -> ()
-! CHECK:             omp.yield
+! CHECK:           omp.wsloop {
+! CHECK-NEXT:        omp.loop_nest (%[[VAL_9:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
+! CHECK:               fir.store %[[VAL_9]] to %[[VAL_3]] : !fir.ref<i32>
+! CHECK:               fir.call @_QPsub4(%[[VAL_4]]) {{.*}}: (!fir.ref<i32>) -> ()
+! CHECK:               omp.yield
+! CHECK:             }
+! CHECK:             omp.terminator
 ! CHECK:           }
 ! CHECK:           omp.terminator
 ! CHECK:         }
@@ -286,7 +289,8 @@ subroutine common_1()
 !CHECK: %[[val_c1_i32:.*]] = arith.constant 1 : i32
 !CHECK: %[[val_19:.*]] = fir.load %[[val_13]] : !fir.ref<i32>
 !CHECK: %[[val_c1_i32_2:.*]] = arith.constant 1 : i32
-!CHECK: omp.wsloop   for (%[[arg:.*]]) : i32 = (%[[val_c1_i32]]) to (%[[val_19]]) inclusive step (%[[val_c1_i32_2]]) {
+!CHECK: omp.wsloop {
+!CHECK-NEXT: omp.loop_nest (%[[arg:.*]]) : i32 = (%[[val_c1_i32]]) to (%[[val_19]]) inclusive step (%[[val_c1_i32_2]]) {
 !CHECK: fir.store %[[arg]] to %[[val_9]] : !fir.ref<i32>
 !CHECK: %[[val_20:.*]] = fir.load %[[val_16]] : !fir.ref<i32>
 !CHECK: %[[val_21:.*]] = fir.load %[[val_9]] : !fir.ref<i32>
@@ -296,6 +300,8 @@ subroutine common_1()
 !CHECK: }
 !CHECK: omp.terminator
 !CHECK: }
+!CHECK: omp.terminator
+!CHECK: }
 !CHECK: return
 !CHECK: }
 subroutine common_2()
diff --git a/flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90 b/flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90
index 389bcba35f77f5..86c4d917fa51ee 100644
--- a/flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90
+++ b/flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90
@@ -17,7 +17,8 @@
 !CHECK: %[[val_c1_i32:.*]] = arith.constant 1 : i32
 !CHECK: %[[val_c100_i32:.*]] = arith.constant 100 : i32
 !CHECK: %[[val_c1_i32_0:.*]] = arith.constant 1 : i32
-!CHECK: omp.wsloop   for (%[[arg:.*]]) : i32 = (%[[val_c1_i32]]) to (%[[val_c100_i32]]) inclusive step (%[[val_c1_i32_0]]) {
+!CHECK: omp.wsloop {
+!CHECK-NEXT: omp.loop_nest (%[[arg:.*]]) : i32 = (%[[val_c1_i32]]) to (%[[val_c100_i32]]) inclusive step (%[[val_c1_i32_0]]) {
 !CHECK: fir.store %[[arg]] to %[[val_0]] : !fir.ref<i32>
 !CHECK: %[[val_11:.*]] = arith.addi %[[arg]], %[[val_c1_i32_0]] : i32
 !CHECK: %[[val_c0_i32:.*]] = arith.constant 0 : i32
@@ -34,6 +35,8 @@
 !CHECK: }
 !CHECK: omp.yield
 !CHECK: }
+!CHECK: omp.terminator
+!CHECK: }
 !CHECK: return
 !CHECK: }
 subroutine lastprivate_common
diff --git a/flang/test/Lower/OpenMP/FIR/location.f90 b/flang/test/Lower/OpenMP/FIR/location.f90
index 64837783767032..6a7fb3c035846e 100644
--- a/flang/test/Lower/OpenMP/FIR/location.f90
+++ b/flang/test/Lower/OpenMP/FIR/location.f90
@@ -28,11 +28,14 @@ subroutine sub_target()
 
 !CHECK-LABEL: sub_loop
 subroutine sub_loop()
-!CHECK: omp.wsloop {{.*}}  {
+!CHECK: omp.wsloop {
+!CHECK-NEXT: omp.loop_nest {{.*}} {
   !$omp do
   do i=1,10
     print *, i
 !CHECK:   omp.yield loc(#[[LOOP_LOC:.*]])
+!CHECK: } loc(#[[LOOP_LOC]])
+!CHECK:   omp.terminator loc(#[[LOOP_LOC]])
 !CHECK: } loc(#[[LOOP_LOC]])
   end do
   !$omp end do
@@ -60,9 +63,9 @@ subroutine sub_if(c)
 
 !CHECK: #[[PAR_LOC]] = loc("{{.*}}location.f90":9:9)
 !CHECK: #[[TAR_LOC]] = loc("{{.*}}location.f90":21:9)
-!CHECK: #[[LOOP_LOC]] = loc("{{.*}}location.f90":32:9)
-!CHECK: #[[BAR_LOC]] = loc("{{.*}}location.f90":44:9)
-!CHECK: #[[TW_LOC]] = loc("{{.*}}location.f90":46:9)
-!CHECK: #[[TY_LOC]] = loc("{{.*}}location.f90":48:9)
-!CHECK: #[[IF_LOC]] = loc("{{.*}}location.f90":55:14)
-!CHECK: #[[TASK_LOC]] = loc("{{.*}}location.f90":55:9)
+!CHECK: #[[LOOP_LOC]] = loc("{{.*}}location.f90":33:9)
+!CHECK: #[[BAR_LOC]] = loc("{{.*}}location.f90":47:9)
+!CHECK: #[[TW_LOC]] = loc("{{.*}}location.f90":49:9)
+!CHECK: #[[TY_LOC]] = loc("{{.*}}location.f90":51:9)
+!CHECK: #[[IF_LOC]] = loc("{{.*}}location.f90":58:14)
+!CHECK: #[[TASK_LOC]] = loc("{{.*}}location.f90":58:9)
diff --git a/flang/test/Lower/OpenMP/FIR/parallel-lastprivate-clause-scalar.f90 b/flang/test/Lower/OpenMP/FIR/parallel-lastprivate-clause-scalar.f90
index 2060e2062c1a34..16832355f5d1bc 100644
--- a/flang/test/Lower/OpenMP/FIR/parallel-lastprivate-clause-scalar.f90
+++ b/flang/test/Lower/OpenMP/FIR/parallel-lastprivate-clause-scalar.f90
@@ -12,8 +12,9 @@
 !CHECK-DAG: %[[ARG1_PVT:.*]] = fir.alloca !fir.char<1,5> {bindc_name = "arg1", 
 
 ! Check that we are accessing the clone inside the loop
-!CHECK-DAG: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
-!CHECK-DAG: %[[UNIT:.*]] = arith.constant 6 : i32
+!CHECK: omp.wsloop {
+!CHECK-NEXT: omp.loop_nest (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK: %[[UNIT:.*]] = arith.constant 6 : i32
 !CHECK-NEXT: %[[ADDR:.*]] = fir.address_of(@_QQclX
 !CHECK-NEXT: %[[CVT0:.*]] = fir.convert %[[ADDR]] 
 !CHECK-NEXT: %[[CNST:.*]] = arith.constant
@@ -36,9 +37,12 @@
 ! Testing lastprivate val update
 !CHECK-DAG: %[[CVT:.*]] = fir.convert %[[ARG1_REF]] : (!fir.ref<!fir.char<1,5>>) -> !fir.ref<i8>
 !CHECK-DAG: %[[CVT1:.*]] = fir.convert %[[ARG1_PVT]] : (!fir.ref<!fir.char<1,5>>) -> !fir.ref<i8>
-!CHECK-DAG: fir.call @llvm.memmove.p0.p0.i64(%[[CVT]], %[[CVT1]]{{.*}})
-!CHECK-DAG: } 
-!CHECK-DAG: omp.yield
+!CHECK: fir.call @llvm.memmove.p0.p0.i64(%[[CVT]], %[[CVT1]]{{.*}})
+!CHECK: }
+!CHECK: omp.yield
+!CHECK: }
+!CHECK: omp.terminator
+!CHECK: }
 
 subroutine lastprivate_character(arg1)
         character(5) :: arg1
@@ -55,7 +59,8 @@ subroutine lastprivate_character(arg1)
 !CHECK: func @_QPlastprivate_int(%[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "arg1"}) {
 !CHECK-DAG: omp.parallel  {
 !CHECK-DAG: %[[CLONE:.*]] = fir.alloca i32 {bindc_name = "arg1"
-!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK: omp.wsloop {
+!CHECK-NEXT: omp.loop_nest (%[[INDX_WS:.*]]) : {{.*}} {
 
 ! Testing last iteration check
 !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
@@ -70,8 +75,11 @@ subroutine lastprivate_character(arg1)
 ! Testing lastprivate val update
 !CHECK-NEXT: %[[CLONE_LD:.*]] = fir.load %[[CLONE]] : !fir.ref<i32>
 !CHECK-NEXT: fir.store %[[CLONE_LD]] to %[[ARG1]] : !fir.ref<i32>
-!CHECK-DAG: }
-!CHECK-DAG: omp.yield
+!CHECK: }
+!CHECK: omp.yield
+!CHECK: }
+!CHECK: omp.terminator
+!CHECK: }
 
 subroutine lastprivate_int(arg1)
         integer :: arg1
@@ -90,7 +98,8 @@ subroutine lastprivate_int(arg1)
 !CHECK: omp.parallel  {
 !CHECK-DAG: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1"
 !CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2"
-!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK: omp.wsloop {
+!CHECK-NEXT: omp.loop_nest (%[[INDX_WS:.*]]) : {{.*}} {
 
 ! Testing last iteration check
 !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
@@ -108,6 +117,9 @@ subroutine lastprivate_int(arg1)
 !CHECK-DAG: fir.store %[[CLONE_LD2]] to %[[ARG2]] : !fir.ref<i32>
 !CHECK: }
 !CHECK: omp.yield
+!CHECK: }
+!CHECK: omp.terminator
+!CHECK: }
 
 subroutine mult_lastprivate_int(arg1, arg2)
         integer :: arg1, arg2
@@ -127,7 +139,8 @@ subroutine mult_lastprivate_int(arg1, arg2)
 !CHECK: omp.parallel  {
 !CHECK-DAG: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1"
 !CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2"
-!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK: omp.wsloop {
+!CHECK-NEXT: omp.loop_nest (%[[INDX_WS:.*]]) : {{.*}} {
 
 !Testing last iteration check
 !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
@@ -145,6 +158,9 @@ subroutine mult_lastprivate_int(arg1, arg2)
 !CHECK-DAG: fir.store %[[CLONE_LD1]] to %[[ARG1]] : !fir.ref<i32>
 !CHECK: }
 !CHECK: omp.yield
+!CHECK: }
+!CHECK: omp.terminator
+!CHECK: }
 
 subroutine mult_lastprivate_int2(arg1, arg2)
         integer :: arg1, arg2
@@ -169,7 +185,8 @@ subroutine mult_lastprivate_int2(arg1, arg2)
 ! Lastprivate Allocation
 !CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2"
 !CHECK-NOT: omp.barrier
-!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK: omp.wsloop {
+!CHECK-NEXT: omp.loop_nest (%[[INDX_WS:.*]]) : {{.*}} {
 
 ! Testing last iteration check
 !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
@@ -185,6 +202,9 @@ subroutine mult_lastprivate_int2(arg1, arg2)
 !CHECK-NEXT: fir.store %[[CLONE_LD]] to %[[ARG2]] : !fir.ref<i32>
 !CHECK-NEXT: }
 !CHECK-NEXT: omp.yield
+!CHECK-NEXT: }
+!CHECK-NEXT: omp.terminator
+!CHECK-NEXT: }
 
 subroutine firstpriv_lastpriv_int(arg1, arg2)
         integer :: arg1, arg2
@@ -207,7 +227,8 @@ subroutine firstpriv_lastpriv_int(arg1, arg2)
 !CHECK-NEXT: %[[FPV_LD:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
 !CHECK-NEXT: fir.store %[[FPV_LD]] to %[[CLONE1]] : !fir.ref<i32>
 !CHECK-NEXT: omp.barrier
-!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK: omp.wsloop {
+!CHECK-NEXT: omp.loop_nest (%[[INDX_WS:.*]]) : {{.*}} {
 ! Testing last iteration check
 !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
 !CHECK: %[[C0:.*]] = arith.constant 0 : i32
@@ -222,6 +243,9 @@ subroutine firstpriv_lastpriv_int(arg1, arg2)
 !CHECK-NEXT: fir.store %[[CLONE_LD]] to %[[ARG1]] : !fir.ref<i32>
 !CHECK-NEXT: }
 !CHECK-NEXT: omp.yield
+!CHECK-NEXT: }
+!CHECK-NEXT: omp.terminator
+!CHECK-NEXT: }
 
 subroutine firstpriv_lastpriv_int2(arg1)
         integer :: arg1
diff --git a/flang/test/Lower/OpenMP/FIR/parallel-private-clause-fixes.f90 b/flang/test/Lower/OpenMP/FIR/parallel-private-clause-fixes.f90
index c99bf761333b81..fb0fb9594c350e 100644
--- a/flang/test/Lower/OpenMP/FIR/parallel-private-clause-fixes.f90
+++ b/flang/test/Lower/OpenMP/FIR/parallel-private-clause-fixes.f90
@@ -13,30 +13,33 @@
 ! CHECK:           %[[ONE:.*]] = arith.constant 1 : i32
 ! CHECK:           %[[VAL_3:.*]] = fir.load %[[VAL_4:.*]] : !fir.ref<i32>
 ! CHECK:           %[[VAL_5:.*]] = arith.constant 1 : i32
-! CHECK:           omp.wsloop for (%[[VAL_6:.*]]) : i32 = (%[[ONE]]) to (%[[VAL_3]]) inclusive step (%[[VAL_5]]) {
-! CHECK:             fir.store %[[VAL_6]] to %[[PRIV_I]] : !fir.ref<i32>
-! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
-! CHECK:             %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i32) -> index
-! CHECK:             %[[VAL_9:.*]] = fir.load %[[VAL_4]] : !fir.ref<i32>
-! CHECK:             %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i32) -> index
-! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : index
-! CHECK:             %[[LB:.*]] = fir.convert %[[VAL_8]] : (index) -> i32
-! CHECK:             %[[VAL_12:.*]]:2 = fir.do_loop %[[VAL_13:[^ ]*]] =
-! CHECK-SAME:            %[[VAL_8]] to %[[VAL_10]] step %[[VAL_11]]
-! CHECK-SAME:            iter_args(%[[IV:.*]] = %[[LB]]) -> (index, i32) {
-! CHECK:               fir.store %[[IV]] to %[[PRIV_J]] : !fir.ref<i32>
-! CHECK:               %[[LOAD:.*]] = fir.load %[[PRIV_I]] : !fir.ref<i32>
-! CHECK:               %[[VAL_15:.*]] = fir.load %[[PRIV_J]] : !fir.ref<i32>
-! CHECK:               %[[VAL_16:.*]] = arith.addi %[[LOAD]], %[[VAL_15]] : i32
-! CHECK:               fir.store %[[VAL_16]] to %[[PRIV_X]] : !fir.ref<i32>
-! CHECK:               %[[VAL_17:.*]] = arith.addi %[[VAL_13]], %[[VAL_11]] : index
-! CHECK:               %[[STEPCAST:.*]] = fir.convert %[[VAL_11]] : (index) -> i32
-! CHECK:               %[[IVLOAD:.*]] = fir.load %[[PRIV_J]] : !fir.ref<i32>
-! CHECK:               %[[IVINC:.*]] = arith.addi %[[IVLOAD]], %[[STEPCAST]]
-! CHECK:               fir.result %[[VAL_17]], %[[IVINC]] : index, i32
+! CHECK:           omp.wsloop {
+! CHECK-NEXT:        omp.loop_nest (%[[VAL_6:.*]]) : i32 = (%[[ONE]]) to (%[[VAL_3]]) inclusive step (%[[VAL_5]]) {
+! CHECK:               fir.store %[[VAL_6]] to %[[PRIV_I]] : !fir.ref<i32>
+! CHECK:               %[[VAL_7:.*]] = arith.constant 1 : i32
+! CHECK:               %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i32) -> index
+! CHECK:               %[[VAL_9:.*]] = fir.load %[[VAL_4]] : !fir.ref<i32>
+! CHECK:               %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i32) -> index
+! CHECK:               %[[VAL_11:.*]] = arith.constant 1 : index
+! CHECK:               %[[LB:.*]] = fir.convert %[[VAL_8]] : (index) -> i32
+! CHECK:               %[[VAL_12:.*]]:2 = fir.do_loop %[[VAL_13:[^ ]*]] =
+! CHECK-SAME:              %[[VAL_8]] to %[[VAL_10]] step %[[VAL_11]]
+! CHECK-SAME:              iter_args(%[[IV:.*]] = %[[LB]]) -> (index, i32) {
+! CHECK:                 fir.store %[[IV]] to %[[PRIV_J]] : !fir.ref<i32>
+! CHECK:                 %[[LOAD:.*]] = fir.load %[[PRIV_I]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_15:.*]] = fir.load %[[PRIV_J]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_16:.*]] = arith.addi %[[LOAD]], %[[VAL_15]] : i32
+! CHECK:                 fir.store %[[VAL_16]] to %[[PRIV_X]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_17:.*]] = arith.addi %[[VAL_13]], %[[VAL_11]] : index
+! CHECK:                 %[[STEPCAST:.*]] = fir.convert %[[VAL_11]] : (index) -> i32
+! CHECK:                 %[[IVLOAD:.*]] = fir.load %[[PRIV_J]] : !fir.ref<i32>
+! CHECK:                 %[[IVINC:.*]] = arith.addi %[[IVLOAD]], %[[STEPCAST]]
+! CHECK:                 fir.result %[[VAL_17]], %[[IVINC]] : index, i32
+! CHECK:               }
+! CHECK:               fir.store %[[VAL_12]]#1 to %[[PRIV_J]] : !fir.ref<i32>
+! CHECK:               omp.yield
 ! CHECK:             }
-! CHECK:             fir.store %[[VAL_12]]#1 to %[[PRIV_J]] : !fir.ref<i32>
-! CHECK:             omp.yield
+! CHECK:             omp.terminator
 ! CHECK:           }
 ! CHECK:           omp.terminator
 ! CHECK:         }
diff --git a/flang/test/Lower/OpenMP/FIR/parallel-private-clause.f90 b/flang/test/Lower/OpenMP/FIR/parallel-private-clause.f90
index 8b75ecbaae8c73..2e68d25a15edc1 100644
--- a/flang/test/Lower/OpenMP/FIR/parallel-private-clause.f90
+++ b/flang/test/Lower/OpenMP/FIR/parallel-private-clause.f90
@@ -249,31 +249,33 @@ subroutine simple_loop_1
   real, allocatable :: r;
   ! FIRDialect:  omp.parallel
   !$OMP PARALLEL PRIVATE(r)
-  ! FIRDialect:     %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
+  ! FIRDialect:      %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
 
-  ! FIRDialect:     [[R:%.*]] = fir.alloca !fir.box<!fir.heap<f32>> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"}
-  ! FIRDialect:     fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
-  ! FIRDialect:     fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:      [[R:%.*]] = fir.alloca !fir.box<!fir.heap<f32>> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"}
+  ! FIRDialect:      fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:      fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
 
-  ! FIRDialect:     %[[WS_LB:.*]] = arith.constant 1 : i32
-  ! FIRDialect:     %[[WS_UB:.*]] = arith.constant 9 : i32
-  ! FIRDialect:     %[[WS_STEP:.*]] = arith.constant 1 : i32
+  ! FIRDialect:      %[[WS_LB:.*]] = arith.constant 1 : i32
+  ! FIRDialect:      %[[WS_UB:.*]] = arith.constant 9 : i32
+  ! FIRDialect:      %[[WS_STEP:.*]] = arith.constant 1 : i32
 
-  ! FIRDialect:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! FIRDialect:      omp.wsloop {
+  ! FIRDialect-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
   !$OMP DO
   do i=1, 9
-  ! FIRDialect:     fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref<i32>
-  ! FIRDialect:     %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref<i32>
-  ! FIRDialect:     fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+  ! FIRDialect:      fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref<i32>
+  ! FIRDialect:      %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref<i32>
+  ! FIRDialect:      fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
-  ! FIRDialect:     omp.yield
-  ! FIRDialect:     {{%.*}} = fir.load [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
-  ! FIRDialect:     fir.if {{%.*}} {
-  ! FIRDialect:     [[LD:%.*]] = fir.load [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
-  ! FIRDialect:     [[AD:%.*]] = fir.box_addr [[LD]] : (!fir.box<!fir.heap<f32>>) -> !fir.heap<f32>
-  ! FIRDialect:     fir.freemem [[AD]] : !fir.heap<f32>
-  ! FIRDialect:     fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:      omp.yield
+  ! FIRDialect:      omp.terminator
+  ! FIRDialect:      {{%.*}} = fir.load [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:      fir.if {{%.*}} {
+  ! FIRDialect:      [[LD:%.*]] = fir.load [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:      [[AD:%.*]] = fir.box_addr [[LD]] : (!fir.box<!fir.heap<f32>>) -> !fir.heap<f32>
+  ! FIRDialect:      fir.freemem [[AD]] : !fir.heap<f32>
+  ! FIRDialect:      fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
   !$OMP END DO
   ! FIRDialect:  omp.terminator
   !$OMP END PARALLEL
@@ -285,31 +287,33 @@ subroutine simple_loop_2
   real, allocatable :: r;
   ! FIRDialect:  omp.parallel
   !$OMP PARALLEL
-  ! FIRDialect:     %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
+  ! FIRDialect:      %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
 
-  ! FIRDialect:     [[R:%.*]] = fir.alloca !fir.box<!fir.heap<f32>> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"}
-  ! FIRDialect:     fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
-  ! FIRDialect:     fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:      [[R:%.*]] = fir.alloca !fir.box<!fir.heap<f32>> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"}
+  ! FIRDialect:      fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:      fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
 
-  ! FIRDialect:     %[[WS_LB:.*]] = arith.constant 1 : i32
-  ! FIRDialect:     %[[WS_UB:.*]] = arith.constant 9 : i32
-  ! FIRDialect:     %[[WS_STEP:.*]] = arith.constant 1 : i32
+  ! FIRDialect:      %[[WS_LB:.*]] = arith.constant 1 : i32
+  ! FIRDialect:      %[[WS_UB:.*]] = arith.constant 9 : i32
+  ! FIRDialect:      %[[WS_STEP:.*]] = arith.constant 1 : i32
 
-  ! FIRDialect:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! FIRDialect:      omp.wsloop {
+  ! FIRDialect-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
   !$OMP DO PRIVATE(r)
   do i=1, 9
-  ! FIRDialect:     fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref<i32>
-  ! FIRDialect:     %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref<i32>
-  ! FIRDialect:     fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+  ! FIRDialect:      fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref<i32>
+  ! FIRDialect:      %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref<i32>
+  ! FIRDialect:      fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
-  ! FIRDialect:     omp.yield
-  ! FIRDialect:     {{%.*}} = fir.load [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
-  ! FIRDialect:     fir.if {{%.*}} {
-  ! FIRDialect:     [[LD:%.*]] = fir.load [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
-  ! FIRDialect:     [[AD:%.*]] = fir.box_addr [[LD]] : (!fir.box<!fir.heap<f32>>) -> !fir.heap<f32>
-  ! FIRDialect:     fir.freemem [[AD]] : !fir.heap<f32>
-  ! FIRDialect:     fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:      omp.yield
+  ! FIRDialect:      omp.terminator
+  ! FIRDialect:      {{%.*}} = fir.load [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:      fir.if {{%.*}} {
+  ! FIRDialect:      [[LD:%.*]] = fir.load [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:      [[AD:%.*]] = fir.box_addr [[LD]] : (!fir.box<!fir.heap<f32>>) -> !fir.heap<f32>
+  ! FIRDialect:      fir.freemem [[AD]] : !fir.heap<f32>
+  ! FIRDialect:      fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
   !$OMP END DO
   ! FIRDialect:  omp.terminator
   !$OMP END PARALLEL
@@ -320,31 +324,33 @@ subroutine simple_loop_3
   integer :: i
   real, allocatable :: r;
   ! FIRDialect:  omp.parallel
-  ! FIRDialect:     %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
+  ! FIRDialect:      %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
 
-  ! FIRDialect:     [[R:%.*]] = fir.alloca !fir.box<!fir.heap<f32>> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"}
-  ! FIRDialect:     fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
-  ! FIRDialect:     fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:      [[R:%.*]] = fir.alloca !fir.box<!fir.heap<f32>> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"}
+  ! FIRDialect:      fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:      fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
 
-  ! FIRDialect:     %[[WS_LB:.*]] = arith.constant 1 : i32
-  ! FIRDialect:     %[[WS_UB:.*]] = arith.constant 9 : i32
-  ! FIRDialect:     %[[WS_STEP:.*]] = arith.constant 1 : i32
+  ! FIRDialect:      %[[WS_LB:.*]] = arith.constant 1 : i32
+  ! FIRDialect:      %[[WS_UB:.*]] = arith.constant 9 : i32
+  ! FIRDialect:      %[[WS_STEP:.*]] = arith.constant 1 : i32
 
-  ! FIRDialect:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! FIRDialect:      omp.wsloop {
+  ! FIRDialect-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
   !$OMP PARALLEL DO PRIVATE(r)
   do i=1, 9
-  ! FIRDialect:     fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref<i32>
-  ! FIRDialect:     %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref<i32>
-  ! FIRDialect:     fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+  ! FIRDialect:      fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref<i32>
+  ! FIRDialect:      %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref<i32>
+  ! FIRDialect:      fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
-  ! FIRDialect:     omp.yield
-  ! FIRDialect:     {{%.*}} = fir.load [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
-  ! FIRDialect:     fir.if {{%.*}} {
-  ! FIRDialect:     [[LD:%.*]] = fir.load [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
-  ! FIRDialect:     [[AD:%.*]] = fir.box_addr [[LD]] : (!fir.box<!fir.heap<f32>>) -> !fir.heap<f32>
-  ! FIRDialect:     fir.freemem [[AD]] : !fir.heap<f32>
-  ! FIRDialect:     fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:      omp.yield
+  ! FIRDialect:      omp.terminator
+  ! FIRDialect:      {{%.*}} = fir.load [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:      fir.if {{%.*}} {
+  ! FIRDialect:      [[LD:%.*]] = fir.load [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:      [[AD:%.*]] = fir.box_addr [[LD]] : (!fir.box<!fir.heap<f32>>) -> !fir.heap<f32>
+  ! FIRDialect:      fir.freemem [[AD]] : !fir.heap<f32>
+  ! FIRDialect:      fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
   !$OMP END PARALLEL DO
   ! FIRDialect:  omp.terminator
 end subroutine
diff --git a/flang/test/Lower/OpenMP/FIR/parallel-wsloop-firstpriv.f90 b/flang/test/Lower/OpenMP/FIR/parallel-wsloop-firstpriv.f90
index 6eb39a2f63725f..490f6d0cf7bcab 100644
--- a/flang/test/Lower/OpenMP/FIR/parallel-wsloop-firstpriv.f90
+++ b/flang/test/Lower/OpenMP/FIR/parallel-wsloop-firstpriv.f90
@@ -17,10 +17,14 @@ subroutine omp_do_firstprivate(a)
   ! CHECK: %[[LB:.*]] = arith.constant 1 : i32
   ! CHECK-NEXT: %[[UB:.*]] = fir.load %[[CLONE]] : !fir.ref<i32>
   ! CHECK-NEXT: %[[STEP:.*]] = arith.constant 1 : i32
-  ! CHECK-NEXT: omp.wsloop   for  (%[[ARG1:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
+  ! CHECK-NEXT: omp.wsloop {
+  ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
   ! CHECK-NEXT: fir.store %[[ARG1]] to %[[REF]] : !fir.ref<i32>
   ! CHECK-NEXT: fir.call @_QPfoo(%[[REF]], %[[CLONE]]) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
   ! CHECK-NEXT: omp.yield
+  ! CHECK-NEXT: }
+  ! CHECK-NEXT: omp.terminator
+  ! CHECK-NEXT: }
     do i=1, a
       call foo(i, a)
     end do
@@ -48,10 +52,14 @@ subroutine omp_do_firstprivate2(a, n)
   ! CHECK: %[[LB:.*]] = fir.load %[[CLONE]] : !fir.ref<i32>
   ! CHECK-NEXT: %[[UB:.*]] = fir.load %[[CLONE1]] : !fir.ref<i32>
   ! CHECK-NEXT: %[[STEP:.*]] = arith.constant 1 : i32
-  ! CHECK-NEXT: omp.wsloop   for  (%[[ARG2:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
+  ! CHECK-NEXT: omp.wsloop {
+  ! CHECK-NEXT: omp.loop_nest (%[[ARG2:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
   ! CHECK-NEXT: fir.store %[[ARG2]] to %[[REF]] : !fir.ref<i32>
   ! CHECK-NEXT: fir.call @_QPfoo(%[[REF]], %[[CLONE]]) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
   ! CHECK-NEXT: omp.yield
+  ! CHECK-NEXT: }
+  ! CHECK-NEXT: omp.terminator
+  ! CHECK-NEXT: }
     do i= a, n
       call foo(i, a)
     end do
diff --git a/flang/test/Lower/OpenMP/FIR/parallel-wsloop.f90 b/flang/test/Lower/OpenMP/FIR/parallel-wsloop.f90
index 8649cf284ffd9d..630d647bc64b60 100644
--- a/flang/test/Lower/OpenMP/FIR/parallel-wsloop.f90
+++ b/flang/test/Lower/OpenMP/FIR/parallel-wsloop.f90
@@ -6,19 +6,21 @@
 subroutine simple_parallel_do
   integer :: i
   ! CHECK:  omp.parallel
-  ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
-  ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
-  ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:      %[[WS_LB:.*]] = arith.constant 1 : i32
+  ! CHECK:      %[[WS_UB:.*]] = arith.constant 9 : i32
+  ! CHECK:      %[[WS_STEP:.*]] = arith.constant 1 : i32
+  ! CHECK:      omp.wsloop {
+  ! CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
   !$OMP PARALLEL DO
   do i=1, 9
-  ! CHECK:    fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref<i32>
-  ! CHECK:    %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]] : !fir.ref<i32>
-  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+  ! CHECK:      fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref<i32>
+  ! CHECK:      %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]] : !fir.ref<i32>
+  ! CHECK:      fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
-  ! CHECK:       omp.yield
-  ! CHECK:       omp.terminator
+  ! CHECK:      omp.yield
+  ! CHECK:      omp.terminator
+  ! CHECK:      omp.terminator
   !$OMP END PARALLEL DO
 end subroutine
 
@@ -32,19 +34,21 @@ subroutine parallel_do_with_parallel_clauses(cond, nt)
   ! CHECK:  %[[COND_CVT:.*]] = fir.convert %[[COND]] : (!fir.logical<4>) -> i1
   ! CHECK:  %[[NT:.*]] = fir.load %[[NT_REF]] : !fir.ref<i32>
   ! CHECK:  omp.parallel if(%[[COND_CVT]] : i1) num_threads(%[[NT]] : i32) proc_bind(close)
-  ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
-  ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
-  ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:      %[[WS_LB:.*]] = arith.constant 1 : i32
+  ! CHECK:      %[[WS_UB:.*]] = arith.constant 9 : i32
+  ! CHECK:      %[[WS_STEP:.*]] = arith.constant 1 : i32
+  ! CHECK:      omp.wsloop {
+  ! CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
   !$OMP PARALLEL DO IF(cond) NUM_THREADS(nt) PROC_BIND(close)
   do i=1, 9
-  ! CHECK:    fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref<i32>
-  ! CHECK:    %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]] : !fir.ref<i32>
-  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+  ! CHECK:      fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref<i32>
+  ! CHECK:      %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]] : !fir.ref<i32>
+  ! CHECK:      fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
-  ! CHECK:       omp.yield
-  ! CHECK:       omp.terminator
+  ! CHECK:      omp.yield
+  ! CHECK:      omp.terminator
+  ! CHECK:      omp.terminator
   !$OMP END PARALLEL DO
 end subroutine
 
@@ -55,19 +59,21 @@ subroutine parallel_do_with_clauses(nt)
   integer :: i
   ! CHECK:  %[[NT:.*]] = fir.load %[[NT_REF]] : !fir.ref<i32>
   ! CHECK:  omp.parallel num_threads(%[[NT]] : i32)
-  ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
-  ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
-  ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:     omp.wsloop schedule(dynamic) for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:      %[[WS_LB:.*]] = arith.constant 1 : i32
+  ! CHECK:      %[[WS_UB:.*]] = arith.constant 9 : i32
+  ! CHECK:      %[[WS_STEP:.*]] = arith.constant 1 : i32
+  ! CHECK:      omp.wsloop schedule(dynamic) {
+  ! CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP PARALLEL DO NUM_THREADS(nt) SCHEDULE(dynamic)
   do i=1, 9
-  ! CHECK:    fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref<i32>
-  ! CHECK:    %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]] : !fir.ref<i32>
-  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+  ! CHECK:      fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref<i32>
+  ! CHECK:      %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]] : !fir.ref<i32>
+  ! CHECK:      fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
-  ! CHECK:       omp.yield
-  ! CHECK:       omp.terminator
+  ! CHECK:      omp.yield
+  ! CHECK:      omp.terminator
+  ! CHECK:      omp.terminator
   !$OMP END PARALLEL DO
 end subroutine
 
@@ -83,18 +89,19 @@ subroutine parallel_do_with_privatisation_clauses(cond,nt)
   integer :: nt
   integer :: i
   ! CHECK:  omp.parallel
-  ! CHECK:    %[[PRIVATE_COND_REF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "cond", pinned, uniq_name = "_QFparallel_do_with_privatisation_clausesEcond"}
-  ! CHECK:    %[[PRIVATE_NT_REF:.*]] = fir.alloca i32 {bindc_name = "nt", pinned, uniq_name = "_QFparallel_do_with_privatisation_clausesEnt"}
-  ! CHECK:    %[[NT_VAL:.*]] = fir.load %[[NT_REF]] : !fir.ref<i32>
-  ! CHECK:    fir.store %[[NT_VAL]] to %[[PRIVATE_NT_REF]] : !fir.ref<i32>
-  ! CHECK:    %[[WS_LB:.*]] = arith.constant 1 : i32
-  ! CHECK:    %[[WS_UB:.*]] = arith.constant 9 : i32
-  ! CHECK:    %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:    omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:      %[[PRIVATE_COND_REF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "cond", pinned, uniq_name = "_QFparallel_do_with_privatisation_clausesEcond"}
+  ! CHECK:      %[[PRIVATE_NT_REF:.*]] = fir.alloca i32 {bindc_name = "nt", pinned, uniq_name = "_QFparallel_do_with_privatisation_clausesEnt"}
+  ! CHECK:      %[[NT_VAL:.*]] = fir.load %[[NT_REF]] : !fir.ref<i32>
+  ! CHECK:      fir.store %[[NT_VAL]] to %[[PRIVATE_NT_REF]] : !fir.ref<i32>
+  ! CHECK:      %[[WS_LB:.*]] = arith.constant 1 : i32
+  ! CHECK:      %[[WS_UB:.*]] = arith.constant 9 : i32
+  ! CHECK:      %[[WS_STEP:.*]] = arith.constant 1 : i32
+  ! CHECK:      omp.wsloop {
+  ! CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
   !$OMP PARALLEL DO PRIVATE(cond) FIRSTPRIVATE(nt)
   do i=1, 9
-  ! CHECK:    fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref<i32>
-  ! CHECK:    %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]] : !fir.ref<i32>
+  ! CHECK:      fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref<i32>
+  ! CHECK:      %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]] : !fir.ref<i32>
   ! CHECK:      fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
   ! CHECK:      %[[PRIVATE_COND_VAL:.*]] = fir.load %[[PRIVATE_COND_REF]] : !fir.ref<!fir.logical<4>>
   ! CHECK:      %[[PRIVATE_COND_VAL_CVT:.*]] = fir.convert %[[PRIVATE_COND_VAL]] : (!fir.logical<4>) -> i1
@@ -104,7 +111,8 @@ subroutine parallel_do_with_privatisation_clauses(cond,nt)
     print*, i, cond, nt
   end do
   ! CHECK:      omp.yield
-  ! CHECK:    omp.terminator
+  ! CHECK:      omp.terminator
+  ! CHECK:      omp.terminator
   !$OMP END PARALLEL DO
 end subroutine
 
@@ -140,10 +148,13 @@ end subroutine parallel_private_do
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 9 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop   for  (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:               fir.store %[[I]] to %[[I_PRIV]] : !fir.ref<i32>
-! CHECK:               fir.call @_QPfoo(%[[I_PRIV]], %[[COND_ADDR]], %[[NT_ADDR]]) {{.*}}: (!fir.ref<i32>, !fir.ref<!fir.logical<4>>, !fir.ref<i32>) -> ()
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop {
+! CHECK-NEXT:          omp.loop_nest (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:                 fir.store %[[I]] to %[[I_PRIV]] : !fir.ref<i32>
+! CHECK:                 fir.call @_QPfoo(%[[I_PRIV]], %[[COND_ADDR]], %[[NT_ADDR]]) {{.*}}: (!fir.ref<i32>, !fir.ref<!fir.logical<4>>, !fir.ref<i32>) -> ()
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
@@ -182,10 +193,13 @@ end subroutine omp_parallel_multiple_firstprivate_do
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop   for  (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
-! CHECK:               fir.store %[[I]] to %[[I_PRIV_ADDR]] : !fir.ref<i32>
-! CHECK:               fir.call @_QPbar(%[[I_PRIV_ADDR]], %[[A_PRIV_ADDR]]) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop {
+! CHECK-NEXT:          omp.loop_nest (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
+! CHECK:                 fir.store %[[I]] to %[[I_PRIV_ADDR]] : !fir.ref<i32>
+! CHECK:                 fir.call @_QPbar(%[[I_PRIV_ADDR]], %[[A_PRIV_ADDR]]) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
@@ -224,10 +238,13 @@ end subroutine parallel_do_private
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 9 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop   for  (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:               fir.store %[[I]] to %[[I_PRIV_ADDR]] : !fir.ref<i32>
-! CHECK:               fir.call @_QPfoo(%[[I_PRIV_ADDR]], %[[COND_ADDR]], %[[NT_ADDR]]) {{.*}}: (!fir.ref<i32>, !fir.ref<!fir.logical<4>>, !fir.ref<i32>) -> ()
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop {
+! CHECK-NEXT:          omp.loop_nest (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:                 fir.store %[[I]] to %[[I_PRIV_ADDR]] : !fir.ref<i32>
+! CHECK:                 fir.call @_QPfoo(%[[I_PRIV_ADDR]], %[[COND_ADDR]], %[[NT_ADDR]]) {{.*}}: (!fir.ref<i32>, !fir.ref<!fir.logical<4>>, !fir.ref<i32>) -> ()
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
@@ -266,10 +283,13 @@ end subroutine omp_parallel_do_multiple_firstprivate
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop   for  (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
-! CHECK:               fir.store %[[I]] to %[[I_PRIV_ADDR]] : !fir.ref<i32>
-! CHECK:               fir.call @_QPbar(%[[I_PRIV_ADDR]], %[[A_PRIV_ADDR]]) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop {
+! CHECK-NEXT:          omp.loop_nest (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
+! CHECK:                 fir.store %[[I]] to %[[I_PRIV_ADDR]] : !fir.ref<i32>
+! CHECK:                 fir.call @_QPbar(%[[I_PRIV_ADDR]], %[[A_PRIV_ADDR]]) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
diff --git a/flang/test/Lower/OpenMP/FIR/stop-stmt-in-region.f90 b/flang/test/Lower/OpenMP/FIR/stop-stmt-in-region.f90
index d6c10bdee88d54..32cc6d17c420be 100644
--- a/flang/test/Lower/OpenMP/FIR/stop-stmt-in-region.f90
+++ b/flang/test/Lower/OpenMP/FIR/stop-stmt-in-region.f90
@@ -77,24 +77,27 @@ subroutine test_stop_in_region3()
 ! CHECK:         %[[VAL_3:.*]] = arith.constant 1 : i32
 ! CHECK:         %[[VAL_4:.*]] = arith.constant 10 : i32
 ! CHECK:         %[[VAL_5:.*]] = arith.constant 1 : i32
-! CHECK:         omp.wsloop   for  (%[[VAL_6:.*]]) : i32 = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_5]]) {
-! CHECK:           fir.store %[[VAL_6]] to %[[VAL_0]] : !fir.ref<i32>
-! CHECK:           cf.br ^bb1
-! CHECK:         ^bb1:
-! CHECK:           %[[VAL_7:.*]] = arith.constant 3 : i32
-! CHECK:           fir.store %[[VAL_7]] to %[[VAL_2]] : !fir.ref<i32>
-! CHECK:           %[[VAL_8:.*]] = fir.load %[[VAL_2]] : !fir.ref<i32>
-! CHECK:           %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:           %[[VAL_10:.*]] = arith.cmpi sgt, %[[VAL_8]], %[[VAL_9]] : i32
-! CHECK:           cf.cond_br %[[VAL_10]], ^bb2, ^bb3
-! CHECK:         ^bb2:
-! CHECK:           %[[VAL_11:.*]] = fir.load %[[VAL_2]] : !fir.ref<i32>
-! CHECK:           %[[VAL_12:.*]] = arith.constant false
-! CHECK:           %[[VAL_13:.*]] = arith.constant false
-! CHECK:           %[[VAL_14:.*]] = fir.call @_FortranAStopStatement(%[[VAL_11]], %[[VAL_12]], %[[VAL_13]]) {{.*}} : (i32, i1, i1) -> none
-! CHECK:           omp.yield
-! CHECK:         ^bb3:
-! CHECK:           omp.yield
+! CHECK:         omp.wsloop {
+! CHECK-NEXT:      omp.loop_nest (%[[VAL_6:.*]]) : i32 = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_5]]) {
+! CHECK:             fir.store %[[VAL_6]] to %[[VAL_0]] : !fir.ref<i32>
+! CHECK:             cf.br ^bb1
+! CHECK:           ^bb1:
+! CHECK:             %[[VAL_7:.*]] = arith.constant 3 : i32
+! CHECK:             fir.store %[[VAL_7]] to %[[VAL_2]] : !fir.ref<i32>
+! CHECK:             %[[VAL_8:.*]] = fir.load %[[VAL_2]] : !fir.ref<i32>
+! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_10:.*]] = arith.cmpi sgt, %[[VAL_8]], %[[VAL_9]] : i32
+! CHECK:             cf.cond_br %[[VAL_10]], ^bb2, ^bb3
+! CHECK:           ^bb2:
+! CHECK:             %[[VAL_11:.*]] = fir.load %[[VAL_2]] : !fir.ref<i32>
+! CHECK:             %[[VAL_12:.*]] = arith.constant false
+! CHECK:             %[[VAL_13:.*]] = arith.constant false
+! CHECK:             %[[VAL_14:.*]] = fir.call @_FortranAStopStatement(%[[VAL_11]], %[[VAL_12]], %[[VAL_13]]) {{.*}} : (i32, i1, i1) -> none
+! CHECK:             omp.yield
+! CHECK:           ^bb3:
+! CHECK:             omp.yield
+! CHECK:           }
+! CHECK:           omp.terminator
 ! CHECK:         }
 ! CHECK:         cf.br ^bb1
 ! CHECK:       ^bb1:
diff --git a/flang/test/Lower/OpenMP/FIR/target.f90 b/flang/test/Lower/OpenMP/FIR/target.f90
index 821196b83c3b99..2b912bc74ae56e 100644
--- a/flang/test/Lower/OpenMP/FIR/target.f90
+++ b/flang/test/Lower/OpenMP/FIR/target.f90
@@ -487,7 +487,8 @@ subroutine omp_target_parallel_do
          !CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32
          !CHECK: %[[VAL_6:.*]] = arith.constant 1024 : i32
          !CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32
-         !CHECK: omp.wsloop   for  (%[[VAL_8:.*]]) : i32 = (%[[VAL_5]]) to (%[[VAL_6]]) inclusive step (%[[VAL_7]]) {
+         !CHECK: omp.wsloop {
+         !CHECK: omp.loop_nest (%[[VAL_8:.*]]) : i32 = (%[[VAL_5]]) to (%[[VAL_6]]) inclusive step (%[[VAL_7]]) {
          !CHECK: fir.store %[[VAL_8]] to %[[VAL_4]] : !fir.ref<i32>
          !CHECK: %[[VAL_9:.*]] = arith.constant 10 : i32
          !CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_4]] : !fir.ref<i32>
@@ -501,6 +502,8 @@ subroutine omp_target_parallel_do
          end do
          !CHECK: omp.yield
          !CHECK: }
+         !CHECK: omp.terminator
+         !CHECK: }
       !CHECK: omp.terminator
       !CHECK: }
    !CHECK: omp.terminator
diff --git a/flang/test/Lower/OpenMP/FIR/unstructured.f90 b/flang/test/Lower/OpenMP/FIR/unstructured.f90
index bfaf38b7ef1afc..6d1c9aab146401 100644
--- a/flang/test/Lower/OpenMP/FIR/unstructured.f90
+++ b/flang/test/Lower/OpenMP/FIR/unstructured.f90
@@ -67,27 +67,33 @@ subroutine ss2(n) ! unstructured OpenMP construct; loop exit inside construct
 ! CHECK:   ^bb1:  // 2 preds: ^bb0, ^bb3
 ! CHECK:     cond_br %{{[0-9]*}}, ^bb2, ^bb4
 ! CHECK:   ^bb2:  // pred: ^bb1
-! CHECK:     omp.wsloop for (%[[ARG1:.*]]) : {{.*}} {
-! CHECK:       fir.store %[[ARG1]] to %[[ALLOCA_2]] : !fir.ref<i32>
-! CHECK:     @_FortranAioBeginExternalListOutput
-! CHECK:       %[[LOAD_1:.*]] = fir.load %[[ALLOCA_2]] : !fir.ref<i32>
-! CHECK:     @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD_1]])
-! CHECK:       omp.yield
+! CHECK:     omp.wsloop {
+! CHECK:       omp.loop_nest (%[[ARG1:.*]]) : {{.*}} {
+! CHECK:         fir.store %[[ARG1]] to %[[ALLOCA_2]] : !fir.ref<i32>
+! CHECK:         @_FortranAioBeginExternalListOutput
+! CHECK:         %[[LOAD_1:.*]] = fir.load %[[ALLOCA_2]] : !fir.ref<i32>
+! CHECK:         @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD_1]])
+! CHECK:         omp.yield
+! CHECK:       }
+! CHECK:       omp.terminator
 ! CHECK:     }
-! CHECK:     omp.wsloop for (%[[ARG2:.*]]) : {{.*}} {
-! CHECK:       fir.store %[[ARG2]] to %[[ALLOCA_1]] : !fir.ref<i32>
-! CHECK:       br ^bb1
-! CHECK:     ^bb2:  // 2 preds: ^bb1, ^bb5
-! CHECK:       cond_br %{{[0-9]*}}, ^bb3, ^bb6
-! CHECK:     ^bb3:  // pred: ^bb2
-! CHECK:       cond_br %{{[0-9]*}}, ^bb4, ^bb5
-! CHECK:     ^bb4:  // pred: ^bb3
-! CHECK:       @_FortranAioBeginExternalListOutput
-! CHECK:       %[[LOAD_2:.*]] = fir.load %[[ALLOCA_K]] : !fir.ref<i32>
-! CHECK:     @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD_2]])
-! CHECK:       br ^bb2
-! CHECK:     ^bb6:  // 2 preds: ^bb2, ^bb4
-! CHECK:       omp.yield
+! CHECK:     omp.wsloop {
+! CHECK:       omp.loop_nest (%[[ARG2:.*]]) : {{.*}} {
+! CHECK:         fir.store %[[ARG2]] to %[[ALLOCA_1]] : !fir.ref<i32>
+! CHECK:         br ^bb1
+! CHECK:       ^bb2:  // 2 preds: ^bb1, ^bb5
+! CHECK:         cond_br %{{[0-9]*}}, ^bb3, ^bb6
+! CHECK:       ^bb3:  // pred: ^bb2
+! CHECK:         cond_br %{{[0-9]*}}, ^bb4, ^bb5
+! CHECK:       ^bb4:  // pred: ^bb3
+! CHECK:         @_FortranAioBeginExternalListOutput
+! CHECK:         %[[LOAD_2:.*]] = fir.load %[[ALLOCA_K]] : !fir.ref<i32>
+! CHECK:         @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD_2]])
+! CHECK:         br ^bb2
+! CHECK:       ^bb6:  // 2 preds: ^bb2, ^bb4
+! CHECK:         omp.yield
+! CHECK:       }
+! CHECK:       omp.terminator
 ! CHECK:     }
 ! CHECK:     br ^bb1
 ! CHECK:   ^bb4:  // pred: ^bb1
@@ -117,20 +123,23 @@ subroutine ss3(n) ! nested unstructured OpenMP constructs
 ! CHECK-LABEL: func @_QPss4{{.*}} {
 ! CHECK:       omp.parallel {
 ! CHECK:         %[[ALLOCA:.*]] = fir.alloca i32 {{{.*}}, pinned}
-! CHECK:         omp.wsloop for (%[[ARG:.*]]) : {{.*}} {
-! CHECK:           fir.store %[[ARG]] to %[[ALLOCA]] : !fir.ref<i32>
-! CHECK:           %[[COND:.*]] = arith.cmpi eq, %{{.*}}, %{{.*}}
-! CHECK:           %[[COND_XOR:.*]] = arith.xori %[[COND]], %{{.*}}
-! CHECK:          fir.if %[[COND_XOR]] {
-! CHECK:           @_FortranAioBeginExternalListOutput
-! CHECK:           %[[LOAD:.*]] = fir.load %[[ALLOCA]] : !fir.ref<i32>
-! CHECK:           @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD]])
-! CHECK:          } else {
-! CHECK:          }
-! CHECK-NEXT:      omp.yield
+! CHECK:         omp.wsloop {
+! CHECK:           omp.loop_nest (%[[ARG:.*]]) : {{.*}} {
+! CHECK:             fir.store %[[ARG]] to %[[ALLOCA]] : !fir.ref<i32>
+! CHECK:             %[[COND:.*]] = arith.cmpi eq, %{{.*}}, %{{.*}}
+! CHECK:             %[[COND_XOR:.*]] = arith.xori %[[COND]], %{{.*}}
+! CHECK:             fir.if %[[COND_XOR]] {
+! CHECK:              @_FortranAioBeginExternalListOutput
+! CHECK:              %[[LOAD:.*]] = fir.load %[[ALLOCA]] : !fir.ref<i32>
+! CHECK:              @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD]])
+! CHECK:             } else {
+! CHECK:             }
+! CHECK-NEXT:        omp.yield
+! CHECK-NEXT:      }
+! CHECK-NEXT:      omp.terminator
+! CHECK-NEXT:    }
+! CHECK:         omp.terminator
 ! CHECK-NEXT:  }
-! CHECK:       omp.terminator
-! CHECK-NEXT:}
 subroutine ss4(n) ! CYCLE in OpenMP wsloop constructs
   !$omp parallel
     do i = 1, 3
@@ -146,20 +155,23 @@ subroutine ss4(n) ! CYCLE in OpenMP wsloop constructs
 
 ! CHECK-LABEL: func @_QPss5() {
 ! CHECK:  omp.parallel  {
-! CHECK:    omp.wsloop {{.*}} {
-! CHECK:      br ^[[BB1:.*]]
-! CHECK:    ^[[BB1]]:
-! CHECK:      br ^[[BB2:.*]]
-! CHECK:    ^[[BB2]]:
-! CHECK:      cond_br %{{.*}}, ^[[BB3:.*]], ^[[BB6:.*]]
-! CHECK:    ^[[BB3]]:
-! CHECK:      cond_br %{{.*}}, ^[[BB4:.*]], ^[[BB3:.*]]
-! CHECK:    ^[[BB4]]:
-! CHECK:      br ^[[BB6]]
-! CHECK:    ^[[BB3]]:
-! CHECK:      br ^[[BB2]]
-! CHECK:    ^[[BB6]]:
-! CHECK:      omp.yield
+! CHECK:    omp.wsloop {
+! CHECK:      omp.loop_nest {{.*}} {
+! CHECK:        br ^[[BB1:.*]]
+! CHECK:      ^[[BB1]]:
+! CHECK:        br ^[[BB2:.*]]
+! CHECK:      ^[[BB2]]:
+! CHECK:        cond_br %{{.*}}, ^[[BB3:.*]], ^[[BB6:.*]]
+! CHECK:      ^[[BB3]]:
+! CHECK:        cond_br %{{.*}}, ^[[BB4:.*]], ^[[BB3:.*]]
+! CHECK:      ^[[BB4]]:
+! CHECK:        br ^[[BB6]]
+! CHECK:      ^[[BB3]]:
+! CHECK:        br ^[[BB2]]
+! CHECK:      ^[[BB6]]:
+! CHECK:        omp.yield
+! CHECK:      }
+! CHECK:      omp.terminator
 ! CHECK:    }
 ! CHECK:    omp.terminator
 ! CHECK:  }
@@ -186,20 +198,23 @@ subroutine ss5() ! EXIT inside OpenMP wsloop (inside parallel)
 ! CHECK:  ^[[BB1_OUTER]]:
 ! CHECK:    cond_br %{{.*}}, ^[[BB2_OUTER:.*]], ^[[BB3_OUTER:.*]]
 ! CHECK:  ^[[BB2_OUTER]]:
-! CHECK:    omp.wsloop {{.*}} {
-! CHECK:      br ^[[BB1:.*]]
-! CHECK:    ^[[BB1]]:
-! CHECK:      br ^[[BB2:.*]]
-! CHECK:    ^[[BB2]]:
-! CHECK:      cond_br %{{.*}}, ^[[BB3:.*]], ^[[BB6:.*]]
-! CHECK:    ^[[BB3]]:
-! CHECK:      cond_br %{{.*}}, ^[[BB4:.*]], ^[[BB5:.*]]
-! CHECK:    ^[[BB4]]:
-! CHECK:      br ^[[BB6]]
-! CHECK:    ^[[BB5]]
-! CHECK:      br ^[[BB2]]
-! CHECK:    ^[[BB6]]:
-! CHECK:      omp.yield
+! CHECK:    omp.wsloop {
+! CHECK:      omp.loop_nest {{.*}} {
+! CHECK:        br ^[[BB1:.*]]
+! CHECK:      ^[[BB1]]:
+! CHECK:        br ^[[BB2:.*]]
+! CHECK:      ^[[BB2]]:
+! CHECK:        cond_br %{{.*}}, ^[[BB3:.*]], ^[[BB6:.*]]
+! CHECK:      ^[[BB3]]:
+! CHECK:        cond_br %{{.*}}, ^[[BB4:.*]], ^[[BB5:.*]]
+! CHECK:      ^[[BB4]]:
+! CHECK:        br ^[[BB6]]
+! CHECK:      ^[[BB5]]
+! CHECK:        br ^[[BB2]]
+! CHECK:      ^[[BB6]]:
+! CHECK:        omp.yield
+! CHECK:      }
+! CHECK:      omp.terminator
 ! CHECK:    }
 ! CHECK:    br ^[[BB1_OUTER]]
 ! CHECK:  ^[[BB3_OUTER]]:
@@ -230,20 +245,23 @@ subroutine ss6() ! EXIT inside OpenMP wsloop in a do loop (inside parallel)
 ! CHECK:   cond_br %{{.*}}, ^[[BB2_OUTER:.*]], ^[[BB3_OUTER:.*]]
 ! CHECK-NEXT: ^[[BB2_OUTER:.*]]:
 ! CHECK:   omp.parallel  {
-! CHECK:     omp.wsloop {{.*}} {
-! CHECK:       br ^[[BB1:.*]]
-! CHECK-NEXT:     ^[[BB1]]:
-! CHECK:       br ^[[BB2:.*]]
-! CHECK-NEXT:     ^[[BB2]]:
-! CHECK:       cond_br %{{.*}}, ^[[BB3:.*]], ^[[BB6:.*]]
-! CHECK-NEXT:     ^[[BB3]]:
-! CHECK:       cond_br %{{.*}}, ^[[BB4:.*]], ^[[BB5:.*]]
-! CHECK-NEXT:     ^[[BB4]]:
-! CHECK:       br ^[[BB6]]
-! CHECK-NEXT:     ^[[BB5]]:
-! CHECK:       br ^[[BB2]]
-! CHECK-NEXT:     ^[[BB6]]:
-! CHECK:       omp.yield
+! CHECK:     omp.wsloop {
+! CHECK:       omp.loop_nest {{.*}} {
+! CHECK:         br ^[[BB1:.*]]
+! CHECK-NEXT:       ^[[BB1]]:
+! CHECK:         br ^[[BB2:.*]]
+! CHECK-NEXT:       ^[[BB2]]:
+! CHECK:         cond_br %{{.*}}, ^[[BB3:.*]], ^[[BB6:.*]]
+! CHECK-NEXT:       ^[[BB3]]:
+! CHECK:         cond_br %{{.*}}, ^[[BB4:.*]], ^[[BB5:.*]]
+! CHECK-NEXT:       ^[[BB4]]:
+! CHECK:         br ^[[BB6]]
+! CHECK-NEXT:       ^[[BB5]]:
+! CHECK:         br ^[[BB2]]
+! CHECK-NEXT:       ^[[BB6]]:
+! CHECK:         omp.yield
+! CHECK:       }
+! CHECK:       omp.terminator
 ! CHECK:     }
 ! CHECK:     omp.terminator
 ! CHECK:   }
@@ -268,20 +286,23 @@ subroutine ss7() ! EXIT inside OpenMP parallel do (inside do loop)
 
 ! CHECK-LABEL: func @_QPss8() {
 ! CHECK:  omp.parallel  {
-! CHECK:    omp.wsloop {{.*}} {
-! CHECK:      br ^[[BB1:.*]]
-! CHECK-NEXT:    ^[[BB1]]:
-! CHECK:      br ^[[BB2:.*]]
-! CHECK:    ^[[BB2]]:
-! CHECK:      cond_br %{{.*}}, ^[[BB3:.*]], ^[[BB6:.*]]
-! CHECK:    ^[[BB3]]:
-! CHECK:      cond_br %{{.*}}, ^[[BB4:.*]], ^[[BB5:.*]]
-! CHECK:    ^[[BB4]]:
-! CHECK-NEXT:    br ^[[BB6]]
-! CHECK:    ^[[BB5]]:
-! CHECK:      br ^[[BB2]]
-! CHECK-NEXT:    ^[[BB6]]:
-! CHECK:      omp.yield
+! CHECK:    omp.wsloop {
+! CHECK:      omp.loop_nest {{.*}} {
+! CHECK:        br ^[[BB1:.*]]
+! CHECK-NEXT:      ^[[BB1]]:
+! CHECK:        br ^[[BB2:.*]]
+! CHECK:      ^[[BB2]]:
+! CHECK:        cond_br %{{.*}}, ^[[BB3:.*]], ^[[BB6:.*]]
+! CHECK:      ^[[BB3]]:
+! CHECK:        cond_br %{{.*}}, ^[[BB4:.*]], ^[[BB5:.*]]
+! CHECK:      ^[[BB4]]:
+! CHECK-NEXT:      br ^[[BB6]]
+! CHECK:      ^[[BB5]]:
+! CHECK:        br ^[[BB2]]
+! CHECK-NEXT:      ^[[BB6]]:
+! CHECK:        omp.yield
+! CHECK:      }
+! CHECK:      omp.terminator
 ! CHECK:    }
 ! CHECK:    omp.terminator
 ! CHECK:  }
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90
index 4030f46299d0b0..e4b85fb447767f 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90
@@ -19,11 +19,14 @@ program wsloop
 ! CHECK:         %[[VAL_3:.*]] = arith.constant 9 : i32
 ! CHECK:         %[[VAL_4:.*]] = arith.constant 1 : i32
 ! CHECK:         %[[VAL_5:.*]] = arith.constant 4 : i32
-! CHECK:         omp.wsloop   schedule(static = %[[VAL_5]] : i32) nowait for  (%[[ARG0:.*]]) : i32 = (%[[VAL_2]]) to (%[[VAL_3]]) inclusive step (%[[VAL_4]]) {
-! CHECK:           fir.store %[[ARG0]] to %[[STORE_IV:.*]] : !fir.ref<i32>
-! CHECK:           %[[LOAD_IV:.*]] = fir.load %[[STORE_IV]] : !fir.ref<i32>
-! CHECK:           {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
-! CHECK:           omp.yield
+! CHECK:         omp.wsloop schedule(static = %[[VAL_5]] : i32) nowait {
+! CHECK-NEXT:      omp.loop_nest (%[[ARG0:.*]]) : i32 = (%[[VAL_2]]) to (%[[VAL_3]]) inclusive step (%[[VAL_4]]) {
+! CHECK:             fir.store %[[ARG0]] to %[[STORE_IV:.*]] : !fir.ref<i32>
+! CHECK:             %[[LOAD_IV:.*]] = fir.load %[[STORE_IV]] : !fir.ref<i32>
+! CHECK:             {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+! CHECK:             omp.yield
+! CHECK:           }
+! CHECK:           omp.terminator
 ! CHECK:         }
 
 end do
@@ -37,13 +40,16 @@ program wsloop
 ! CHECK:         %[[VAL_15:.*]] = arith.constant 9 : i32
 ! CHECK:         %[[VAL_16:.*]] = arith.constant 1 : i32
 ! CHECK:         %[[VAL_17:.*]] = arith.constant 4 : i32
-! CHECK:         omp.wsloop   schedule(static = %[[VAL_17]] : i32) nowait for  (%[[ARG1:.*]]) : i32 = (%[[VAL_14]]) to (%[[VAL_15]]) inclusive step (%[[VAL_16]]) {
-! CHECK:           fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref<i32>
-! CHECK:           %[[VAL_24:.*]] = arith.constant 2 : i32
-! CHECK:           %[[LOAD_IV1:.*]] = fir.load %[[STORE_IV1]] : !fir.ref<i32>
-! CHECK:           %[[VAL_25:.*]] = arith.muli %[[VAL_24]], %[[LOAD_IV1]] : i32
-! CHECK:           {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[VAL_25]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
-! CHECK:           omp.yield
+! CHECK:         omp.wsloop schedule(static = %[[VAL_17]] : i32) nowait {
+! CHECK-NEXT:      omp.loop_nest (%[[ARG1:.*]]) : i32 = (%[[VAL_14]]) to (%[[VAL_15]]) inclusive step (%[[VAL_16]]) {
+! CHECK:             fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref<i32>
+! CHECK:             %[[VAL_24:.*]] = arith.constant 2 : i32
+! CHECK:             %[[LOAD_IV1:.*]] = fir.load %[[STORE_IV1]] : !fir.ref<i32>
+! CHECK:             %[[VAL_25:.*]] = arith.muli %[[VAL_24]], %[[LOAD_IV1]] : i32
+! CHECK:             {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[VAL_25]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+! CHECK:             omp.yield
+! CHECK:           }
+! CHECK:           omp.terminator
 ! CHECK:         }
   
 end do
@@ -61,13 +67,16 @@ program wsloop
 ! CHECK:         %[[VAL_30:.*]] = arith.constant 9 : i32
 ! CHECK:         %[[VAL_31:.*]] = arith.constant 1 : i32
 ! CHECK:         %[[VAL_32:.*]] = fir.load %[[VAL_0]] : !fir.ref<i32>
-! CHECK:         omp.wsloop   schedule(static = %[[VAL_32]] : i32) nowait for  (%[[ARG2:.*]]) : i32 = (%[[VAL_29]]) to (%[[VAL_30]]) inclusive step (%[[VAL_31]]) {
-! CHECK:           fir.store %[[ARG2]] to %[[STORE_IV2:.*]] : !fir.ref<i32>
-! CHECK:           %[[VAL_39:.*]] = arith.constant 3 : i32
-! CHECK:           %[[LOAD_IV2:.*]] = fir.load %[[STORE_IV2]] : !fir.ref<i32>
-! CHECK:           %[[VAL_40:.*]] = arith.muli %[[VAL_39]], %[[LOAD_IV2]] : i32
-! CHECK:           {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[VAL_40]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
-! CHECK:           omp.yield
+! CHECK:         omp.wsloop schedule(static = %[[VAL_32]] : i32) nowait {
+! CHECK-NEXT:      omp.loop_nest (%[[ARG2:.*]]) : i32 = (%[[VAL_29]]) to (%[[VAL_30]]) inclusive step (%[[VAL_31]]) {
+! CHECK:             fir.store %[[ARG2]] to %[[STORE_IV2:.*]] : !fir.ref<i32>
+! CHECK:             %[[VAL_39:.*]] = arith.constant 3 : i32
+! CHECK:             %[[LOAD_IV2:.*]] = fir.load %[[STORE_IV2]] : !fir.ref<i32>
+! CHECK:             %[[VAL_40:.*]] = arith.muli %[[VAL_39]], %[[LOAD_IV2]] : i32
+! CHECK:             {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[VAL_40]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+! CHECK:             omp.yield
+! CHECK:           }
+! CHECK:           omp.terminator
 ! CHECK:         }
 ! CHECK:         return
 ! CHECK:       }
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-collapse.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-collapse.f90
index 933fc0910e3382..a2ba3ebfe1967d 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-collapse.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-collapse.f90
@@ -39,19 +39,22 @@ program wsloop_collapse
   do i = 1, a
      do j= 1, b
         do k = 1, c
-! CHECK:           omp.wsloop for (%[[ARG0:.*]], %[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[VAL_20]], %[[VAL_23]], %[[VAL_26]]) to (%[[VAL_21]], %[[VAL_24]], %[[VAL_27]]) inclusive step (%[[VAL_22]], %[[VAL_25]], %[[VAL_28]]) {
-! CHECK:             fir.store %[[ARG0]] to %[[STORE_IV0:.*]] : !fir.ref<i32>
-! CHECK:             fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref<i32>
-! CHECK:             fir.store %[[ARG2]] to %[[STORE_IV2:.*]] : !fir.ref<i32>
-! CHECK:             %[[VAL_12:.*]] = fir.load %[[VAL_6]] : !fir.ref<i32>
-! CHECK:             %[[LOAD_IV0:.*]] = fir.load %[[STORE_IV0]] : !fir.ref<i32>
-! CHECK:             %[[VAL_13:.*]] = arith.addi %[[VAL_12]], %[[LOAD_IV0]] : i32
-! CHECK:             %[[LOAD_IV1:.*]] = fir.load %[[STORE_IV1]] : !fir.ref<i32>
-! CHECK:             %[[VAL_14:.*]] = arith.addi %[[VAL_13]], %[[LOAD_IV1]] : i32
-! CHECK:             %[[LOAD_IV2:.*]] = fir.load %[[STORE_IV2]] : !fir.ref<i32>
-! CHECK:             %[[VAL_15:.*]] = arith.addi %[[VAL_14]], %[[LOAD_IV2]] : i32
-! CHECK:             fir.store %[[VAL_15]] to %[[VAL_6]] : !fir.ref<i32>
-! CHECK:             omp.yield
+! CHECK:           omp.wsloop {
+! CHECK-NEXT:        omp.loop_nest (%[[ARG0:.*]], %[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[VAL_20]], %[[VAL_23]], %[[VAL_26]]) to (%[[VAL_21]], %[[VAL_24]], %[[VAL_27]]) inclusive step (%[[VAL_22]], %[[VAL_25]], %[[VAL_28]]) {
+! CHECK:               fir.store %[[ARG0]] to %[[STORE_IV0:.*]] : !fir.ref<i32>
+! CHECK:               fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref<i32>
+! CHECK:               fir.store %[[ARG2]] to %[[STORE_IV2:.*]] : !fir.ref<i32>
+! CHECK:               %[[VAL_12:.*]] = fir.load %[[VAL_6]] : !fir.ref<i32>
+! CHECK:               %[[LOAD_IV0:.*]] = fir.load %[[STORE_IV0]] : !fir.ref<i32>
+! CHECK:               %[[VAL_13:.*]] = arith.addi %[[VAL_12]], %[[LOAD_IV0]] : i32
+! CHECK:               %[[LOAD_IV1:.*]] = fir.load %[[STORE_IV1]] : !fir.ref<i32>
+! CHECK:               %[[VAL_14:.*]] = arith.addi %[[VAL_13]], %[[LOAD_IV1]] : i32
+! CHECK:               %[[LOAD_IV2:.*]] = fir.load %[[STORE_IV2]] : !fir.ref<i32>
+! CHECK:               %[[VAL_15:.*]] = arith.addi %[[VAL_14]], %[[LOAD_IV2]] : i32
+! CHECK:               fir.store %[[VAL_15]] to %[[VAL_6]] : !fir.ref<i32>
+! CHECK:               omp.yield
+! CHECK:             }
+! CHECK:             omp.terminator
 ! CHECK:           }
            x = x + i + j + k
         end do
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-monotonic.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-monotonic.f90
index 1c381475f6cbb1..941885bdb1e384 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-monotonic.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-monotonic.f90
@@ -11,23 +11,27 @@ program wsloop_dynamic
 !CHECK:  omp.parallel {
 
 !$OMP DO SCHEDULE(monotonic:dynamic)
-!CHECK:     %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
-!CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
-!CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
-!CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-!CHECK:     omp.wsloop schedule(dynamic, monotonic) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
-!CHECK:       fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref<i32>
+!CHECK:      %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
+!CHECK:      %[[WS_LB:.*]] = arith.constant 1 : i32
+!CHECK:      %[[WS_UB:.*]] = arith.constant 9 : i32
+!CHECK:      %[[WS_STEP:.*]] = arith.constant 1 : i32
+!CHECK:      omp.wsloop schedule(dynamic, monotonic) nowait {
+!CHECK-NEXT:   omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
+!CHECK:          fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref<i32>
 
   do i=1, 9
     print*, i
-!CHECK:    %[[RTBEGIN:.*]] = fir.call @_FortranAioBeginExternalListOutput
-!CHECK:    %[[LOAD:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref<i32>
-!CHECK:    fir.call @_FortranAioOutputInteger32(%[[RTBEGIN]], %[[LOAD]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
-!CHECK:    fir.call @_FortranAioEndIoStatement(%[[RTBEGIN]]) {{.*}}: (!fir.ref<i8>) -> i32
+!CHECK:          %[[RTBEGIN:.*]] = fir.call @_FortranAioBeginExternalListOutput
+!CHECK:          %[[LOAD:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref<i32>
+!CHECK:          fir.call @_FortranAioOutputInteger32(%[[RTBEGIN]], %[[LOAD]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+!CHECK:          fir.call @_FortranAioEndIoStatement(%[[RTBEGIN]]) {{.*}}: (!fir.ref<i8>) -> i32
   end do
-!CHECK:       omp.yield
-!CHECK:       omp.terminator
-!CHECK:     }
+!CHECK:          omp.yield
+!CHECK:        }
+!CHECK:        omp.terminator
+!CHECK:      }
+!CHECK:      omp.terminator
+!CHECK:    }
 
 !$OMP END DO NOWAIT
 !$OMP END PARALLEL
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-nonmonotonic.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-nonmonotonic.f90
index 3f425200b8fa48..96a3e71f34b1ea 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-nonmonotonic.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-nonmonotonic.f90
@@ -12,24 +12,27 @@ program wsloop_dynamic
 !CHECK:  omp.parallel {
 
 !$OMP DO SCHEDULE(nonmonotonic:dynamic)
-!CHECK:     %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
-!CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
-!CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
-!CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-!CHECK:     omp.wsloop schedule(dynamic, nonmonotonic) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
-!CHECK:       fir.store %[[I]] to %[[ALLOCA_IV]] : !fir.ref<i32>
+!CHECK:      %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
+!CHECK:      %[[WS_LB:.*]] = arith.constant 1 : i32
+!CHECK:      %[[WS_UB:.*]] = arith.constant 9 : i32
+!CHECK:      %[[WS_STEP:.*]] = arith.constant 1 : i32
+!CHECK:      omp.wsloop schedule(dynamic, nonmonotonic) nowait {
+!CHECK-NEXT:   omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+!CHECK:          fir.store %[[I]] to %[[ALLOCA_IV]] : !fir.ref<i32>
 
   do i=1, 9
     print*, i
-!CHECK:    %[[RTBEGIN:.*]] = fir.call @_FortranAioBeginExternalListOutput
-!CHECK:    %[[LOAD:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref<i32>
-!CHECK:    fir.call @_FortranAioOutputInteger32(%[[RTBEGIN]], %[[LOAD]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
-!CHECK:    fir.call @_FortranAioEndIoStatement(%[[RTBEGIN]]) {{.*}}: (!fir.ref<i8>) -> i32
+!CHECK:          %[[RTBEGIN:.*]] = fir.call @_FortranAioBeginExternalListOutput
+!CHECK:          %[[LOAD:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref<i32>
+!CHECK:          fir.call @_FortranAioOutputInteger32(%[[RTBEGIN]], %[[LOAD]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+!CHECK:          fir.call @_FortranAioEndIoStatement(%[[RTBEGIN]]) {{.*}}: (!fir.ref<i8>) -> i32
   end do
-!CHECK:       omp.yield
-!CHECK:         }
-!CHECK:       omp.terminator
-!CHECK:     }
+!CHECK:          omp.yield
+!CHECK:        }
+!CHECK:        omp.terminator
+!CHECK:      }
+!CHECK:    omp.terminator
+!CHECK:  }
 
 !$OMP END DO NOWAIT
 !$OMP END PARALLEL
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-ordered.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-ordered.f90
index 7548d7a597228a..fec027608d9913 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-ordered.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-ordered.f90
@@ -6,9 +6,12 @@
 subroutine wsloop_ordered_no_para()
   integer :: a(10), i
 
-! CHECK:  omp.wsloop ordered(0) for (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
-! CHECK:    omp.yield
-! CHECK:  }
+! CHECK:      omp.wsloop ordered(0) {
+! CHECK-NEXT:   omp.loop_nest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
+! CHECK:          omp.yield
+! CHECK:        }
+! CHECK:        omp.terminator
+! CHECK:      }
 
   !$omp do ordered
   do i = 2, 10
@@ -25,9 +28,12 @@ subroutine wsloop_ordered_with_para()
   integer :: a(10), i
 
 ! CHECK: func @_QPwsloop_ordered_with_para() {
-! CHECK:  omp.wsloop ordered(1) for (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
-! CHECK:    omp.yield
-! CHECK:  }
+! CHECK:      omp.wsloop ordered(1) {
+! CHECK-NEXT:   omp.loop_nest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
+! CHECK:          omp.yield
+! CHECK:        }
+! CHECK:        omp.terminator
+! CHECK:      }
 
   !$omp do ordered(1)
   do i = 2, 10
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add-byref.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add-byref.f90
index 08f5a0fcdbae67..b6dfec09007e54 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add-byref.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add-byref.f90
@@ -80,13 +80,16 @@
 ! CHECK:             %[[VAL_4:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_5:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@add_reduction_byref_i32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref<i32>)  for  (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) {
-! CHECK:               fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<i32>
-! CHECK:               %[[VAL_9:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
-! CHECK:               %[[VAL_10:.*]] = fir.load %[[VAL_3]] : !fir.ref<i32>
-! CHECK:               %[[VAL_11:.*]] = arith.addi %[[VAL_9]], %[[VAL_10]] : i32
-! CHECK:               fir.store %[[VAL_11]] to %[[VAL_7]] : !fir.ref<i32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@add_reduction_byref_i32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref<i32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) {
+! CHECK:                 fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_9:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_10:.*]] = fir.load %[[VAL_3]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_11:.*]] = arith.addi %[[VAL_9]], %[[VAL_10]] : i32
+! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_7]] : !fir.ref<i32>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
@@ -116,14 +119,17 @@ subroutine simple_int_reduction
 ! CHECK:             %[[VAL_4:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_5:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@add_reduction_byref_f32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref<f32>)  for  (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) {
-! CHECK:               fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<i32>
-! CHECK:               %[[VAL_9:.*]] = fir.load %[[VAL_7]] : !fir.ref<f32>
-! CHECK:               %[[VAL_10:.*]] = fir.load %[[VAL_3]] : !fir.ref<i32>
-! CHECK:               %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (i32) -> f32
-! CHECK:               %[[VAL_12:.*]] = arith.addf %[[VAL_9]], %[[VAL_11]] fastmath<contract> : f32
-! CHECK:               fir.store %[[VAL_12]] to %[[VAL_7]] : !fir.ref<f32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@add_reduction_byref_f32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref<f32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) {
+! CHECK:                 fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_9:.*]] = fir.load %[[VAL_7]] : !fir.ref<f32>
+! CHECK:                 %[[VAL_10:.*]] = fir.load %[[VAL_3]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (i32) -> f32
+! CHECK:                 %[[VAL_12:.*]] = arith.addf %[[VAL_9]], %[[VAL_11]] fastmath<contract> : f32
+! CHECK:                 fir.store %[[VAL_12]] to %[[VAL_7]] : !fir.ref<f32>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
@@ -152,13 +158,16 @@ subroutine simple_real_reduction
 ! CHECK:             %[[VAL_4:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_5:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@add_reduction_byref_i32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref<i32>)  for  (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) {
-! CHECK:               fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<i32>
-! CHECK:               %[[VAL_9:.*]] = fir.load %[[VAL_3]] : !fir.ref<i32>
-! CHECK:               %[[VAL_10:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
-! CHECK:               %[[VAL_11:.*]] = arith.addi %[[VAL_9]], %[[VAL_10]] : i32
-! CHECK:               fir.store %[[VAL_11]] to %[[VAL_7]] : !fir.ref<i32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@add_reduction_byref_i32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref<i32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) {
+! CHECK:                 fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_9:.*]] = fir.load %[[VAL_3]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_10:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_11:.*]] = arith.addi %[[VAL_9]], %[[VAL_10]] : i32
+! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_7]] : !fir.ref<i32>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
@@ -187,14 +196,17 @@ subroutine simple_int_reduction_switch_order
 ! CHECK:             %[[VAL_4:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_5:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@add_reduction_byref_f32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref<f32>)  for  (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) {
-! CHECK:               fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<i32>
-! CHECK:               %[[VAL_9:.*]] = fir.load %[[VAL_3]] : !fir.ref<i32>
-! CHECK:               %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i32) -> f32
-! CHECK:               %[[VAL_11:.*]] = fir.load %[[VAL_7]] : !fir.ref<f32>
-! CHECK:               %[[VAL_12:.*]] = arith.addf %[[VAL_10]], %[[VAL_11]] fastmath<contract> : f32
-! CHECK:               fir.store %[[VAL_12]] to %[[VAL_7]] : !fir.ref<f32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@add_reduction_byref_f32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref<f32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) {
+! CHECK:                 fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_9:.*]] = fir.load %[[VAL_3]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i32) -> f32
+! CHECK:                 %[[VAL_11:.*]] = fir.load %[[VAL_7]] : !fir.ref<f32>
+! CHECK:                 %[[VAL_12:.*]] = arith.addf %[[VAL_10]], %[[VAL_11]] fastmath<contract> : f32
+! CHECK:                 fir.store %[[VAL_12]] to %[[VAL_7]] : !fir.ref<f32>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
@@ -229,21 +241,24 @@ subroutine simple_real_reduction_switch_order
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@add_reduction_byref_i32 %[[VAL_1]] -> %[[VAL_11:.*]] : !fir.ref<i32>, @add_reduction_byref_i32 %[[VAL_2]] -> %[[VAL_12:.*]] : !fir.ref<i32>, @add_reduction_byref_i32 %[[VAL_3]] -> %[[VAL_13:.*]] : !fir.ref<i32>)  for  (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
-! CHECK:               fir.store %[[VAL_14]] to %[[VAL_7]] : !fir.ref<i32>
-! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
-! CHECK:               %[[VAL_16:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
-! CHECK:               %[[VAL_17:.*]] = arith.addi %[[VAL_15]], %[[VAL_16]] : i32
-! CHECK:               fir.store %[[VAL_17]] to %[[VAL_11]] : !fir.ref<i32>
-! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_12]] : !fir.ref<i32>
-! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
-! CHECK:               %[[VAL_20:.*]] = arith.addi %[[VAL_18]], %[[VAL_19]] : i32
-! CHECK:               fir.store %[[VAL_20]] to %[[VAL_12]] : !fir.ref<i32>
-! CHECK:               %[[VAL_21:.*]] = fir.load %[[VAL_13]] : !fir.ref<i32>
-! CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
-! CHECK:               %[[VAL_23:.*]] = arith.addi %[[VAL_21]], %[[VAL_22]] : i32
-! CHECK:               fir.store %[[VAL_23]] to %[[VAL_13]] : !fir.ref<i32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@add_reduction_byref_i32 %[[VAL_1]] -> %[[VAL_11:.*]] : !fir.ref<i32>, @add_reduction_byref_i32 %[[VAL_2]] -> %[[VAL_12:.*]] : !fir.ref<i32>, @add_reduction_byref_i32 %[[VAL_3]] -> %[[VAL_13:.*]] : !fir.ref<i32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
+! CHECK:                 fir.store %[[VAL_14]] to %[[VAL_7]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_16:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_17:.*]] = arith.addi %[[VAL_15]], %[[VAL_16]] : i32
+! CHECK:                 fir.store %[[VAL_17]] to %[[VAL_11]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_12]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_20:.*]] = arith.addi %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK:                 fir.store %[[VAL_20]] to %[[VAL_12]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_21:.*]] = fir.load %[[VAL_13]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_22:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_23:.*]] = arith.addi %[[VAL_21]], %[[VAL_22]] : i32
+! CHECK:                 fir.store %[[VAL_23]] to %[[VAL_13]] : !fir.ref<i32>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
@@ -282,24 +297,27 @@ subroutine multiple_int_reductions_same_type
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@add_reduction_byref_f32 %[[VAL_1]] -> %[[VAL_11:.*]] : !fir.ref<f32>, @add_reduction_byref_f32 %[[VAL_2]] -> %[[VAL_12:.*]] : !fir.ref<f32>, @add_reduction_byref_f32 %[[VAL_3]] -> %[[VAL_13:.*]] : !fir.ref<f32>)  for  (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
-! CHECK:               fir.store %[[VAL_14]] to %[[VAL_7]] : !fir.ref<i32>
-! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_11]] : !fir.ref<f32>
-! CHECK:               %[[VAL_16:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
-! CHECK:               %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (i32) -> f32
-! CHECK:               %[[VAL_18:.*]] = arith.addf %[[VAL_15]], %[[VAL_17]] fastmath<contract> : f32
-! CHECK:               fir.store %[[VAL_18]] to %[[VAL_11]] : !fir.ref<f32>
-! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_12]] : !fir.ref<f32>
-! CHECK:               %[[VAL_20:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
-! CHECK:               %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i32) -> f32
-! CHECK:               %[[VAL_22:.*]] = arith.addf %[[VAL_19]], %[[VAL_21]] fastmath<contract> : f32
-! CHECK:               fir.store %[[VAL_22]] to %[[VAL_12]] : !fir.ref<f32>
-! CHECK:               %[[VAL_23:.*]] = fir.load %[[VAL_13]] : !fir.ref<f32>
-! CHECK:               %[[VAL_24:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
-! CHECK:               %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (i32) -> f32
-! CHECK:               %[[VAL_26:.*]] = arith.addf %[[VAL_23]], %[[VAL_25]] fastmath<contract> : f32
-! CHECK:               fir.store %[[VAL_26]] to %[[VAL_13]] : !fir.ref<f32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@add_reduction_byref_f32 %[[VAL_1]] -> %[[VAL_11:.*]] : !fir.ref<f32>, @add_reduction_byref_f32 %[[VAL_2]] -> %[[VAL_12:.*]] : !fir.ref<f32>, @add_reduction_byref_f32 %[[VAL_3]] -> %[[VAL_13:.*]] : !fir.ref<f32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
+! CHECK:                 fir.store %[[VAL_14]] to %[[VAL_7]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_11]] : !fir.ref<f32>
+! CHECK:                 %[[VAL_16:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (i32) -> f32
+! CHECK:                 %[[VAL_18:.*]] = arith.addf %[[VAL_15]], %[[VAL_17]] fastmath<contract> : f32
+! CHECK:                 fir.store %[[VAL_18]] to %[[VAL_11]] : !fir.ref<f32>
+! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_12]] : !fir.ref<f32>
+! CHECK:                 %[[VAL_20:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i32) -> f32
+! CHECK:                 %[[VAL_22:.*]] = arith.addf %[[VAL_19]], %[[VAL_21]] fastmath<contract> : f32
+! CHECK:                 fir.store %[[VAL_22]] to %[[VAL_12]] : !fir.ref<f32>
+! CHECK:                 %[[VAL_23:.*]] = fir.load %[[VAL_13]] : !fir.ref<f32>
+! CHECK:                 %[[VAL_24:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (i32) -> f32
+! CHECK:                 %[[VAL_26:.*]] = arith.addf %[[VAL_23]], %[[VAL_25]] fastmath<contract> : f32
+! CHECK:                 fir.store %[[VAL_26]] to %[[VAL_13]] : !fir.ref<f32>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
@@ -341,28 +359,31 @@ subroutine multiple_real_reductions_same_type
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@add_reduction_byref_i32 %[[VAL_2]] -> %[[VAL_13:.*]] : !fir.ref<i32>, @add_reduction_byref_i64 %[[VAL_3]] -> %[[VAL_14:.*]] : !fir.ref<i64>, @add_reduction_byref_f32 %[[VAL_4]] -> %[[VAL_15:.*]] : !fir.ref<f32>, @add_reduction_byref_f64 %[[VAL_1]] -> %[[VAL_16:.*]] : !fir.ref<f64>)  for  (%[[VAL_17:.*]]) : i32 = (%[[VAL_10]]) to (%[[VAL_11]]) inclusive step (%[[VAL_12]]) {
-! CHECK:               fir.store %[[VAL_17]] to %[[VAL_9]] : !fir.ref<i32>
-! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_13]] : !fir.ref<i32>
-! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
-! CHECK:               %[[VAL_20:.*]] = arith.addi %[[VAL_18]], %[[VAL_19]] : i32
-! CHECK:               fir.store %[[VAL_20]] to %[[VAL_13]] : !fir.ref<i32>
-! CHECK:               %[[VAL_21:.*]] = fir.load %[[VAL_14]] : !fir.ref<i64>
-! CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
-! CHECK:               %[[VAL_23:.*]] = fir.convert %[[VAL_22]] : (i32) -> i64
-! CHECK:               %[[VAL_24:.*]] = arith.addi %[[VAL_21]], %[[VAL_23]] : i64
-! CHECK:               fir.store %[[VAL_24]] to %[[VAL_14]] : !fir.ref<i64>
-! CHECK:               %[[VAL_25:.*]] = fir.load %[[VAL_15]] : !fir.ref<f32>
-! CHECK:               %[[VAL_26:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
-! CHECK:               %[[VAL_27:.*]] = fir.convert %[[VAL_26]] : (i32) -> f32
-! CHECK:               %[[VAL_28:.*]] = arith.addf %[[VAL_25]], %[[VAL_27]] fastmath<contract> : f32
-! CHECK:               fir.store %[[VAL_28]] to %[[VAL_15]] : !fir.ref<f32>
-! CHECK:               %[[VAL_29:.*]] = fir.load %[[VAL_16]] : !fir.ref<f64>
-! CHECK:               %[[VAL_30:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
-! CHECK:               %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (i32) -> f64
-! CHECK:               %[[VAL_32:.*]] = arith.addf %[[VAL_29]], %[[VAL_31]] fastmath<contract> : f64
-! CHECK:               fir.store %[[VAL_32]] to %[[VAL_16]] : !fir.ref<f64>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@add_reduction_byref_i32 %[[VAL_2]] -> %[[VAL_13:.*]] : !fir.ref<i32>, @add_reduction_byref_i64 %[[VAL_3]] -> %[[VAL_14:.*]] : !fir.ref<i64>, @add_reduction_byref_f32 %[[VAL_4]] -> %[[VAL_15:.*]] : !fir.ref<f32>, @add_reduction_byref_f64 %[[VAL_1]] -> %[[VAL_16:.*]] : !fir.ref<f64>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_17:.*]]) : i32 = (%[[VAL_10]]) to (%[[VAL_11]]) inclusive step (%[[VAL_12]]) {
+! CHECK:                 fir.store %[[VAL_17]] to %[[VAL_9]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_13]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_20:.*]] = arith.addi %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK:                 fir.store %[[VAL_20]] to %[[VAL_13]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_21:.*]] = fir.load %[[VAL_14]] : !fir.ref<i64>
+! CHECK:                 %[[VAL_22:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_23:.*]] = fir.convert %[[VAL_22]] : (i32) -> i64
+! CHECK:                 %[[VAL_24:.*]] = arith.addi %[[VAL_21]], %[[VAL_23]] : i64
+! CHECK:                 fir.store %[[VAL_24]] to %[[VAL_14]] : !fir.ref<i64>
+! CHECK:                 %[[VAL_25:.*]] = fir.load %[[VAL_15]] : !fir.ref<f32>
+! CHECK:                 %[[VAL_26:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_27:.*]] = fir.convert %[[VAL_26]] : (i32) -> f32
+! CHECK:                 %[[VAL_28:.*]] = arith.addf %[[VAL_25]], %[[VAL_27]] fastmath<contract> : f32
+! CHECK:                 fir.store %[[VAL_28]] to %[[VAL_15]] : !fir.ref<f32>
+! CHECK:                 %[[VAL_29:.*]] = fir.load %[[VAL_16]] : !fir.ref<f64>
+! CHECK:                 %[[VAL_30:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (i32) -> f64
+! CHECK:                 %[[VAL_32:.*]] = arith.addf %[[VAL_29]], %[[VAL_31]] fastmath<contract> : f64
+! CHECK:                 fir.store %[[VAL_32]] to %[[VAL_16]] : !fir.ref<f64>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add.f90
index dc96b875f745f2..e0b9330b1a6d5c 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add.f90
@@ -55,13 +55,16 @@
 ! CHECK:             %[[VAL_4:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_5:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_i32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref<i32>)  for  (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) {
-! CHECK:               fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<i32>
-! CHECK:               %[[VAL_9:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
-! CHECK:               %[[VAL_10:.*]] = fir.load %[[VAL_3]] : !fir.ref<i32>
-! CHECK:               %[[VAL_11:.*]] = arith.addi %[[VAL_9]], %[[VAL_10]] : i32
-! CHECK:               fir.store %[[VAL_11]] to %[[VAL_7]] : !fir.ref<i32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@add_reduction_i32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref<i32>) 
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) {
+! CHECK:                 fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_9:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_10:.*]] = fir.load %[[VAL_3]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_11:.*]] = arith.addi %[[VAL_9]], %[[VAL_10]] : i32
+! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_7]] : !fir.ref<i32>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
@@ -91,14 +94,17 @@ subroutine simple_int_reduction
 ! CHECK:             %[[VAL_4:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_5:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_f32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref<f32>)  for  (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) {
-! CHECK:               fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<i32>
-! CHECK:               %[[VAL_9:.*]] = fir.load %[[VAL_7]] : !fir.ref<f32>
-! CHECK:               %[[VAL_10:.*]] = fir.load %[[VAL_3]] : !fir.ref<i32>
-! CHECK:               %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (i32) -> f32
-! CHECK:               %[[VAL_12:.*]] = arith.addf %[[VAL_9]], %[[VAL_11]] fastmath<contract> : f32
-! CHECK:               fir.store %[[VAL_12]] to %[[VAL_7]] : !fir.ref<f32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@add_reduction_f32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref<f32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) {
+! CHECK:                 fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_9:.*]] = fir.load %[[VAL_7]] : !fir.ref<f32>
+! CHECK:                 %[[VAL_10:.*]] = fir.load %[[VAL_3]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (i32) -> f32
+! CHECK:                 %[[VAL_12:.*]] = arith.addf %[[VAL_9]], %[[VAL_11]] fastmath<contract> : f32
+! CHECK:                 fir.store %[[VAL_12]] to %[[VAL_7]] : !fir.ref<f32>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
@@ -127,13 +133,16 @@ subroutine simple_real_reduction
 ! CHECK:             %[[VAL_4:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_5:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_i32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref<i32>)  for  (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) {
-! CHECK:               fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<i32>
-! CHECK:               %[[VAL_9:.*]] = fir.load %[[VAL_3]] : !fir.ref<i32>
-! CHECK:               %[[VAL_10:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
-! CHECK:               %[[VAL_11:.*]] = arith.addi %[[VAL_9]], %[[VAL_10]] : i32
-! CHECK:               fir.store %[[VAL_11]] to %[[VAL_7]] : !fir.ref<i32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@add_reduction_i32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref<i32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) {
+! CHECK:                 fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_9:.*]] = fir.load %[[VAL_3]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_10:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_11:.*]] = arith.addi %[[VAL_9]], %[[VAL_10]] : i32
+! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_7]] : !fir.ref<i32>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
@@ -162,14 +171,17 @@ subroutine simple_int_reduction_switch_order
 ! CHECK:             %[[VAL_4:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_5:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_f32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref<f32>)  for  (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) {
-! CHECK:               fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<i32>
-! CHECK:               %[[VAL_9:.*]] = fir.load %[[VAL_3]] : !fir.ref<i32>
-! CHECK:               %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i32) -> f32
-! CHECK:               %[[VAL_11:.*]] = fir.load %[[VAL_7]] : !fir.ref<f32>
-! CHECK:               %[[VAL_12:.*]] = arith.addf %[[VAL_10]], %[[VAL_11]] fastmath<contract> : f32
-! CHECK:               fir.store %[[VAL_12]] to %[[VAL_7]] : !fir.ref<f32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@add_reduction_f32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref<f32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) {
+! CHECK:                 fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_9:.*]] = fir.load %[[VAL_3]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i32) -> f32
+! CHECK:                 %[[VAL_11:.*]] = fir.load %[[VAL_7]] : !fir.ref<f32>
+! CHECK:                 %[[VAL_12:.*]] = arith.addf %[[VAL_10]], %[[VAL_11]] fastmath<contract> : f32
+! CHECK:                 fir.store %[[VAL_12]] to %[[VAL_7]] : !fir.ref<f32>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
@@ -204,21 +216,24 @@ subroutine simple_real_reduction_switch_order
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_i32 %[[VAL_1]] -> %[[VAL_11:.*]] : !fir.ref<i32>, @add_reduction_i32 %[[VAL_2]] -> %[[VAL_12:.*]] : !fir.ref<i32>, @add_reduction_i32 %[[VAL_3]] -> %[[VAL_13:.*]] : !fir.ref<i32>)  for  (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
-! CHECK:               fir.store %[[VAL_14]] to %[[VAL_7]] : !fir.ref<i32>
-! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
-! CHECK:               %[[VAL_16:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
-! CHECK:               %[[VAL_17:.*]] = arith.addi %[[VAL_15]], %[[VAL_16]] : i32
-! CHECK:               fir.store %[[VAL_17]] to %[[VAL_11]] : !fir.ref<i32>
-! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_12]] : !fir.ref<i32>
-! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
-! CHECK:               %[[VAL_20:.*]] = arith.addi %[[VAL_18]], %[[VAL_19]] : i32
-! CHECK:               fir.store %[[VAL_20]] to %[[VAL_12]] : !fir.ref<i32>
-! CHECK:               %[[VAL_21:.*]] = fir.load %[[VAL_13]] : !fir.ref<i32>
-! CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
-! CHECK:               %[[VAL_23:.*]] = arith.addi %[[VAL_21]], %[[VAL_22]] : i32
-! CHECK:               fir.store %[[VAL_23]] to %[[VAL_13]] : !fir.ref<i32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@add_reduction_i32 %[[VAL_1]] -> %[[VAL_11:.*]] : !fir.ref<i32>, @add_reduction_i32 %[[VAL_2]] -> %[[VAL_12:.*]] : !fir.ref<i32>, @add_reduction_i32 %[[VAL_3]] -> %[[VAL_13:.*]] : !fir.ref<i32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
+! CHECK:                 fir.store %[[VAL_14]] to %[[VAL_7]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_16:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_17:.*]] = arith.addi %[[VAL_15]], %[[VAL_16]] : i32
+! CHECK:                 fir.store %[[VAL_17]] to %[[VAL_11]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_12]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_20:.*]] = arith.addi %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK:                 fir.store %[[VAL_20]] to %[[VAL_12]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_21:.*]] = fir.load %[[VAL_13]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_22:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_23:.*]] = arith.addi %[[VAL_21]], %[[VAL_22]] : i32
+! CHECK:                 fir.store %[[VAL_23]] to %[[VAL_13]] : !fir.ref<i32>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
@@ -257,24 +272,27 @@ subroutine multiple_int_reductions_same_type
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_f32 %[[VAL_1]] -> %[[VAL_11:.*]] : !fir.ref<f32>, @add_reduction_f32 %[[VAL_2]] -> %[[VAL_12:.*]] : !fir.ref<f32>, @add_reduction_f32 %[[VAL_3]] -> %[[VAL_13:.*]] : !fir.ref<f32>)  for  (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
-! CHECK:               fir.store %[[VAL_14]] to %[[VAL_7]] : !fir.ref<i32>
-! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_11]] : !fir.ref<f32>
-! CHECK:               %[[VAL_16:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
-! CHECK:               %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (i32) -> f32
-! CHECK:               %[[VAL_18:.*]] = arith.addf %[[VAL_15]], %[[VAL_17]] fastmath<contract> : f32
-! CHECK:               fir.store %[[VAL_18]] to %[[VAL_11]] : !fir.ref<f32>
-! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_12]] : !fir.ref<f32>
-! CHECK:               %[[VAL_20:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
-! CHECK:               %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i32) -> f32
-! CHECK:               %[[VAL_22:.*]] = arith.addf %[[VAL_19]], %[[VAL_21]] fastmath<contract> : f32
-! CHECK:               fir.store %[[VAL_22]] to %[[VAL_12]] : !fir.ref<f32>
-! CHECK:               %[[VAL_23:.*]] = fir.load %[[VAL_13]] : !fir.ref<f32>
-! CHECK:               %[[VAL_24:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
-! CHECK:               %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (i32) -> f32
-! CHECK:               %[[VAL_26:.*]] = arith.addf %[[VAL_23]], %[[VAL_25]] fastmath<contract> : f32
-! CHECK:               fir.store %[[VAL_26]] to %[[VAL_13]] : !fir.ref<f32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@add_reduction_f32 %[[VAL_1]] -> %[[VAL_11:.*]] : !fir.ref<f32>, @add_reduction_f32 %[[VAL_2]] -> %[[VAL_12:.*]] : !fir.ref<f32>, @add_reduction_f32 %[[VAL_3]] -> %[[VAL_13:.*]] : !fir.ref<f32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
+! CHECK:                 fir.store %[[VAL_14]] to %[[VAL_7]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_11]] : !fir.ref<f32>
+! CHECK:                 %[[VAL_16:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (i32) -> f32
+! CHECK:                 %[[VAL_18:.*]] = arith.addf %[[VAL_15]], %[[VAL_17]] fastmath<contract> : f32
+! CHECK:                 fir.store %[[VAL_18]] to %[[VAL_11]] : !fir.ref<f32>
+! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_12]] : !fir.ref<f32>
+! CHECK:                 %[[VAL_20:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i32) -> f32
+! CHECK:                 %[[VAL_22:.*]] = arith.addf %[[VAL_19]], %[[VAL_21]] fastmath<contract> : f32
+! CHECK:                 fir.store %[[VAL_22]] to %[[VAL_12]] : !fir.ref<f32>
+! CHECK:                 %[[VAL_23:.*]] = fir.load %[[VAL_13]] : !fir.ref<f32>
+! CHECK:                 %[[VAL_24:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (i32) -> f32
+! CHECK:                 %[[VAL_26:.*]] = arith.addf %[[VAL_23]], %[[VAL_25]] fastmath<contract> : f32
+! CHECK:                 fir.store %[[VAL_26]] to %[[VAL_13]] : !fir.ref<f32>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
@@ -316,28 +334,31 @@ subroutine multiple_real_reductions_same_type
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_i32 %[[VAL_2]] -> %[[VAL_13:.*]] : !fir.ref<i32>, @add_reduction_i64 %[[VAL_3]] -> %[[VAL_14:.*]] : !fir.ref<i64>, @add_reduction_f32 %[[VAL_4]] -> %[[VAL_15:.*]] : !fir.ref<f32>, @add_reduction_f64 %[[VAL_1]] -> %[[VAL_16:.*]] : !fir.ref<f64>)  for  (%[[VAL_17:.*]]) : i32 = (%[[VAL_10]]) to (%[[VAL_11]]) inclusive step (%[[VAL_12]]) {
-! CHECK:               fir.store %[[VAL_17]] to %[[VAL_9]] : !fir.ref<i32>
-! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_13]] : !fir.ref<i32>
-! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
-! CHECK:               %[[VAL_20:.*]] = arith.addi %[[VAL_18]], %[[VAL_19]] : i32
-! CHECK:               fir.store %[[VAL_20]] to %[[VAL_13]] : !fir.ref<i32>
-! CHECK:               %[[VAL_21:.*]] = fir.load %[[VAL_14]] : !fir.ref<i64>
-! CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
-! CHECK:               %[[VAL_23:.*]] = fir.convert %[[VAL_22]] : (i32) -> i64
-! CHECK:               %[[VAL_24:.*]] = arith.addi %[[VAL_21]], %[[VAL_23]] : i64
-! CHECK:               fir.store %[[VAL_24]] to %[[VAL_14]] : !fir.ref<i64>
-! CHECK:               %[[VAL_25:.*]] = fir.load %[[VAL_15]] : !fir.ref<f32>
-! CHECK:               %[[VAL_26:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
-! CHECK:               %[[VAL_27:.*]] = fir.convert %[[VAL_26]] : (i32) -> f32
-! CHECK:               %[[VAL_28:.*]] = arith.addf %[[VAL_25]], %[[VAL_27]] fastmath<contract> : f32
-! CHECK:               fir.store %[[VAL_28]] to %[[VAL_15]] : !fir.ref<f32>
-! CHECK:               %[[VAL_29:.*]] = fir.load %[[VAL_16]] : !fir.ref<f64>
-! CHECK:               %[[VAL_30:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
-! CHECK:               %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (i32) -> f64
-! CHECK:               %[[VAL_32:.*]] = arith.addf %[[VAL_29]], %[[VAL_31]] fastmath<contract> : f64
-! CHECK:               fir.store %[[VAL_32]] to %[[VAL_16]] : !fir.ref<f64>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@add_reduction_i32 %[[VAL_2]] -> %[[VAL_13:.*]] : !fir.ref<i32>, @add_reduction_i64 %[[VAL_3]] -> %[[VAL_14:.*]] : !fir.ref<i64>, @add_reduction_f32 %[[VAL_4]] -> %[[VAL_15:.*]] : !fir.ref<f32>, @add_reduction_f64 %[[VAL_1]] -> %[[VAL_16:.*]] : !fir.ref<f64>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_17:.*]]) : i32 = (%[[VAL_10]]) to (%[[VAL_11]]) inclusive step (%[[VAL_12]]) {
+! CHECK:                 fir.store %[[VAL_17]] to %[[VAL_9]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_13]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_20:.*]] = arith.addi %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK:                 fir.store %[[VAL_20]] to %[[VAL_13]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_21:.*]] = fir.load %[[VAL_14]] : !fir.ref<i64>
+! CHECK:                 %[[VAL_22:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_23:.*]] = fir.convert %[[VAL_22]] : (i32) -> i64
+! CHECK:                 %[[VAL_24:.*]] = arith.addi %[[VAL_21]], %[[VAL_23]] : i64
+! CHECK:                 fir.store %[[VAL_24]] to %[[VAL_14]] : !fir.ref<i64>
+! CHECK:                 %[[VAL_25:.*]] = fir.load %[[VAL_15]] : !fir.ref<f32>
+! CHECK:                 %[[VAL_26:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_27:.*]] = fir.convert %[[VAL_26]] : (i32) -> f32
+! CHECK:                 %[[VAL_28:.*]] = arith.addf %[[VAL_25]], %[[VAL_27]] fastmath<contract> : f32
+! CHECK:                 fir.store %[[VAL_28]] to %[[VAL_15]] : !fir.ref<f32>
+! CHECK:                 %[[VAL_29:.*]] = fir.load %[[VAL_16]] : !fir.ref<f64>
+! CHECK:                 %[[VAL_30:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (i32) -> f64
+! CHECK:                 %[[VAL_32:.*]] = arith.addf %[[VAL_29]], %[[VAL_31]] fastmath<contract> : f64
+! CHECK:                 fir.store %[[VAL_32]] to %[[VAL_16]] : !fir.ref<f64>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-iand-byref.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-iand-byref.f90
index 6717597ff3b04d..b25ab84f60fe91 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-iand-byref.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-iand-byref.f90
@@ -23,7 +23,8 @@
 !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>>
 !CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_iandEx"}
 !CHECK: omp.parallel
-!CHECK: omp.wsloop byref reduction(@iand_byref_i32 %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) for
+!CHECK: omp.wsloop byref reduction(@iand_byref_i32 %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>)
+!CHECK-NEXT: omp.loop_nest
 !CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
 !CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
 !CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
@@ -31,6 +32,7 @@
 !CHECK: fir.store %[[RES]] to %[[PRV]] : !fir.ref<i32>
 !CHECK: omp.yield
 !CHECK: omp.terminator
+!CHECK: omp.terminator
 
 subroutine reduction_iand(y)
   integer :: x, y(:)
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-iand.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-iand.f90
index 9bc45f9f3a0d87..dfc140d7d5f619 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-iand.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-iand.f90
@@ -13,7 +13,8 @@
 !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>>
 !CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_iandEx"}
 !CHECK: omp.parallel
-!CHECK: omp.wsloop reduction(@[[IAND_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) for
+!CHECK: omp.wsloop reduction(@[[IAND_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>)
+!CHECK-NEXT: omp.loop_nest
 !CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
 !CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
 !CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
@@ -21,6 +22,7 @@
 !CHECK: fir.store %[[RES]] to %[[PRV]] : !fir.ref<i32>
 !CHECK: omp.yield
 !CHECK: omp.terminator
+!CHECK: omp.terminator
 
 subroutine reduction_iand(y)
   integer :: x, y(:)
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ieor-byref.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ieor-byref.f90
index 1baa59a510fa11..56eb087bae5a08 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ieor-byref.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ieor-byref.f90
@@ -22,7 +22,8 @@
 !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>>
 !CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_ieorEx"}
 !CHECK: omp.parallel
-!CHECK: omp.wsloop byref reduction(@ieor_byref_i32 %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) for
+!CHECK: omp.wsloop byref reduction(@ieor_byref_i32 %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>)
+!CHECK-NEXT: omp.loop_nest
 !CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
 !CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
 !CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
@@ -30,6 +31,7 @@
 !CHECK: fir.store %[[RES]] to %[[PRV]] : !fir.ref<i32>
 !CHECK: omp.yield
 !CHECK: omp.terminator
+!CHECK: omp.terminator
 
 subroutine reduction_ieor(y)
   integer :: x, y(:)
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ieor.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ieor.f90
index 9c07d5ee20873b..1ddf82b828cb01 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ieor.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ieor.f90
@@ -13,7 +13,8 @@
 !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>>
 !CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_ieorEx"}
 !CHECK: omp.parallel
-!CHECK: omp.wsloop reduction(@[[IEOR_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) for
+!CHECK: omp.wsloop reduction(@[[IEOR_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>)
+!CHECK-NEXT: omp.loop_nest
 !CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
 !CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
 !CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
@@ -21,6 +22,7 @@
 !CHECK: fir.store %[[RES]] to %[[PRV]] : !fir.ref<i32>
 !CHECK: omp.yield
 !CHECK: omp.terminator
+!CHECK: omp.terminator
 
 subroutine reduction_ieor(y)
   integer :: x, y(:)
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ior-byref.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ior-byref.f90
index 5482ef33fc8aa9..e761d24cd303b6 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ior-byref.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ior-byref.f90
@@ -22,7 +22,8 @@
 !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>> 
 !CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_iorEx"}
 !CHECK: omp.parallel
-!CHECK: omp.wsloop byref reduction(@ior_byref_i32 %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) for 
+!CHECK: omp.wsloop byref reduction(@ior_byref_i32 %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>)
+!CHECK-NEXT: omp.loop_nest
 !CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
 !CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
 !CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
@@ -30,6 +31,7 @@
 !CHECK: fir.store %[[RES]] to %[[PRV]] : !fir.ref<i32>
 !CHECK: omp.yield
 !CHECK: omp.terminator
+!CHECK: omp.terminator
 
 subroutine reduction_ior(y)
   integer :: x, y(:)
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ior.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ior.f90
index 79cc8b2d892275..148dbc909babe9 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ior.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ior.f90
@@ -13,7 +13,8 @@
 !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>> 
 !CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_iorEx"}
 !CHECK: omp.parallel
-!CHECK: omp.wsloop reduction(@[[IOR_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) for 
+!CHECK: omp.wsloop reduction(@[[IOR_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>)
+!CHECK-NEXT: omp.loop_nest
 !CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
 !CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
 !CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
@@ -21,6 +22,7 @@
 !CHECK: fir.store %[[RES]] to %[[PRV]] : !fir.ref<i32>
 !CHECK: omp.yield
 !CHECK: omp.terminator
+!CHECK: omp.terminator
 
 subroutine reduction_ior(y)
   integer :: x, y(:)
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv-byref.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv-byref.f90
index 696ff68b2059cd..17cd02a0ca7ff7 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv-byref.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv-byref.f90
@@ -36,21 +36,23 @@
 ! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
-! CHECK:               fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<i32>
-! CHECK:               %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_12:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
-! CHECK:               %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i32) -> i64
-! CHECK:               %[[VAL_14:.*]] = arith.constant 1 : i64
-! CHECK:               %[[VAL_15:.*]] = arith.subi %[[VAL_13]], %[[VAL_14]] : i64
-! CHECK:               %[[VAL_16:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_15]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_17:.*]] = fir.load %[[VAL_16]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_18:.*]] = fir.convert %[[VAL_11]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_19:.*]] = fir.convert %[[VAL_17]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_20:.*]] = arith.cmpi eq, %[[VAL_18]], %[[VAL_19]] : i1
-! CHECK:               %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i1) -> !fir.logical<4>
-! CHECK:               fir.store %[[VAL_21]] to %[[VAL_9]] : !fir.ref<!fir.logical<4>>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
+! CHECK:                 fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_12:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i32) -> i64
+! CHECK:                 %[[VAL_14:.*]] = arith.constant 1 : i64
+! CHECK:                 %[[VAL_15:.*]] = arith.subi %[[VAL_13]], %[[VAL_14]] : i64
+! CHECK:                 %[[VAL_16:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_15]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_17:.*]] = fir.load %[[VAL_16]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_18:.*]] = fir.convert %[[VAL_11]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_19:.*]] = fir.convert %[[VAL_17]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_20:.*]] = arith.cmpi eq, %[[VAL_18]], %[[VAL_19]] : i1
+! CHECK:                 %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i1) -> !fir.logical<4>
+! CHECK:                 fir.store %[[VAL_21]] to %[[VAL_9]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
@@ -78,21 +80,23 @@ subroutine simple_reduction(y)
 ! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
-! CHECK:               fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<i32>
-! CHECK:               %[[VAL_11:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
-! CHECK:               %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> i64
-! CHECK:               %[[VAL_13:.*]] = arith.constant 1 : i64
-! CHECK:               %[[VAL_14:.*]] = arith.subi %[[VAL_12]], %[[VAL_13]] : i64
-! CHECK:               %[[VAL_15:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_14]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_16:.*]] = fir.load %[[VAL_15]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_17:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_18:.*]] = fir.convert %[[VAL_16]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_19:.*]] = fir.convert %[[VAL_17]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_20:.*]] = arith.cmpi eq, %[[VAL_18]], %[[VAL_19]] : i1
-! CHECK:               %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i1) -> !fir.logical<4>
-! CHECK:               fir.store %[[VAL_21]] to %[[VAL_9]] : !fir.ref<!fir.logical<4>>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
+! CHECK:                 fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_11:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> i64
+! CHECK:                 %[[VAL_13:.*]] = arith.constant 1 : i64
+! CHECK:                 %[[VAL_14:.*]] = arith.subi %[[VAL_12]], %[[VAL_13]] : i64
+! CHECK:                 %[[VAL_15:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_14]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_16:.*]] = fir.load %[[VAL_15]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_17:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_18:.*]] = fir.convert %[[VAL_16]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_19:.*]] = fir.convert %[[VAL_17]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_20:.*]] = arith.cmpi eq, %[[VAL_18]], %[[VAL_19]] : i1
+! CHECK:                 %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i1) -> !fir.logical<4>
+! CHECK:                 fir.store %[[VAL_21]] to %[[VAL_9]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
@@ -128,45 +132,47 @@ subroutine simple_reduction_switch_order(y)
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_3]] -> %[[VAL_16:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_4]] -> %[[VAL_17:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_18:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
-! CHECK:               fir.store %[[VAL_18]] to %[[VAL_11]] : !fir.ref<i32>
-! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_15]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_20:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
-! CHECK:               %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i32) -> i64
-! CHECK:               %[[VAL_22:.*]] = arith.constant 1 : i64
-! CHECK:               %[[VAL_23:.*]] = arith.subi %[[VAL_21]], %[[VAL_22]] : i64
-! CHECK:               %[[VAL_24:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_23]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_25:.*]] = fir.load %[[VAL_24]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_19]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_27:.*]] = fir.convert %[[VAL_25]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_28:.*]] = arith.cmpi eq, %[[VAL_26]], %[[VAL_27]] : i1
-! CHECK:               %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (i1) -> !fir.logical<4>
-! CHECK:               fir.store %[[VAL_29]] to %[[VAL_15]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_30:.*]] = fir.load %[[VAL_16]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_31:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
-! CHECK:               %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
-! CHECK:               %[[VAL_33:.*]] = arith.constant 1 : i64
-! CHECK:               %[[VAL_34:.*]] = arith.subi %[[VAL_32]], %[[VAL_33]] : i64
-! CHECK:               %[[VAL_35:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_34]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_36:.*]] = fir.load %[[VAL_35]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_37:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_38:.*]] = fir.convert %[[VAL_36]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_39:.*]] = arith.cmpi eq, %[[VAL_37]], %[[VAL_38]] : i1
-! CHECK:               %[[VAL_40:.*]] = fir.convert %[[VAL_39]] : (i1) -> !fir.logical<4>
-! CHECK:               fir.store %[[VAL_40]] to %[[VAL_16]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_41:.*]] = fir.load %[[VAL_17]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_42:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
-! CHECK:               %[[VAL_43:.*]] = fir.convert %[[VAL_42]] : (i32) -> i64
-! CHECK:               %[[VAL_44:.*]] = arith.constant 1 : i64
-! CHECK:               %[[VAL_45:.*]] = arith.subi %[[VAL_43]], %[[VAL_44]] : i64
-! CHECK:               %[[VAL_46:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_45]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_48:.*]] = fir.convert %[[VAL_41]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_49:.*]] = fir.convert %[[VAL_47]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_50:.*]] = arith.cmpi eq, %[[VAL_48]], %[[VAL_49]] : i1
-! CHECK:               %[[VAL_51:.*]] = fir.convert %[[VAL_50]] : (i1) -> !fir.logical<4>
-! CHECK:               fir.store %[[VAL_51]] to %[[VAL_17]] : !fir.ref<!fir.logical<4>>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_3]] -> %[[VAL_16:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_4]] -> %[[VAL_17:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_18:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:                 fir.store %[[VAL_18]] to %[[VAL_11]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_15]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_20:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i32) -> i64
+! CHECK:                 %[[VAL_22:.*]] = arith.constant 1 : i64
+! CHECK:                 %[[VAL_23:.*]] = arith.subi %[[VAL_21]], %[[VAL_22]] : i64
+! CHECK:                 %[[VAL_24:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_23]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_25:.*]] = fir.load %[[VAL_24]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_26:.*]] = fir.convert %[[VAL_19]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_27:.*]] = fir.convert %[[VAL_25]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_28:.*]] = arith.cmpi eq, %[[VAL_26]], %[[VAL_27]] : i1
+! CHECK:                 %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (i1) -> !fir.logical<4>
+! CHECK:                 fir.store %[[VAL_29]] to %[[VAL_15]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_30:.*]] = fir.load %[[VAL_16]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_31:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
+! CHECK:                 %[[VAL_33:.*]] = arith.constant 1 : i64
+! CHECK:                 %[[VAL_34:.*]] = arith.subi %[[VAL_32]], %[[VAL_33]] : i64
+! CHECK:                 %[[VAL_35:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_34]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_36:.*]] = fir.load %[[VAL_35]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_37:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_38:.*]] = fir.convert %[[VAL_36]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_39:.*]] = arith.cmpi eq, %[[VAL_37]], %[[VAL_38]] : i1
+! CHECK:                 %[[VAL_40:.*]] = fir.convert %[[VAL_39]] : (i1) -> !fir.logical<4>
+! CHECK:                 fir.store %[[VAL_40]] to %[[VAL_16]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_41:.*]] = fir.load %[[VAL_17]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_42:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_43:.*]] = fir.convert %[[VAL_42]] : (i32) -> i64
+! CHECK:                 %[[VAL_44:.*]] = arith.constant 1 : i64
+! CHECK:                 %[[VAL_45:.*]] = arith.subi %[[VAL_43]], %[[VAL_44]] : i64
+! CHECK:                 %[[VAL_46:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_45]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_48:.*]] = fir.convert %[[VAL_41]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_49:.*]] = fir.convert %[[VAL_47]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_50:.*]] = arith.cmpi eq, %[[VAL_48]], %[[VAL_49]] : i1
+! CHECK:                 %[[VAL_51:.*]] = fir.convert %[[VAL_50]] : (i1) -> !fir.logical<4>
+! CHECK:                 fir.store %[[VAL_51]] to %[[VAL_17]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv.f90
index 6dcb3952655eab..e714e45540c393 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv.f90
@@ -30,21 +30,23 @@
 ! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
-! CHECK:               fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<i32>
-! CHECK:               %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_12:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
-! CHECK:               %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i32) -> i64
-! CHECK:               %[[VAL_14:.*]] = arith.constant 1 : i64
-! CHECK:               %[[VAL_15:.*]] = arith.subi %[[VAL_13]], %[[VAL_14]] : i64
-! CHECK:               %[[VAL_16:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_15]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_17:.*]] = fir.load %[[VAL_16]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_18:.*]] = fir.convert %[[VAL_11]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_19:.*]] = fir.convert %[[VAL_17]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_20:.*]] = arith.cmpi eq, %[[VAL_18]], %[[VAL_19]] : i1
-! CHECK:               %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i1) -> !fir.logical<4>
-! CHECK:               fir.store %[[VAL_21]] to %[[VAL_9]] : !fir.ref<!fir.logical<4>>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
+! CHECK:                 fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_12:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i32) -> i64
+! CHECK:                 %[[VAL_14:.*]] = arith.constant 1 : i64
+! CHECK:                 %[[VAL_15:.*]] = arith.subi %[[VAL_13]], %[[VAL_14]] : i64
+! CHECK:                 %[[VAL_16:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_15]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_17:.*]] = fir.load %[[VAL_16]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_18:.*]] = fir.convert %[[VAL_11]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_19:.*]] = fir.convert %[[VAL_17]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_20:.*]] = arith.cmpi eq, %[[VAL_18]], %[[VAL_19]] : i1
+! CHECK:                 %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i1) -> !fir.logical<4>
+! CHECK:                 fir.store %[[VAL_21]] to %[[VAL_9]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
@@ -72,21 +74,23 @@ subroutine simple_reduction(y)
 ! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
-! CHECK:               fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<i32>
-! CHECK:               %[[VAL_11:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
-! CHECK:               %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> i64
-! CHECK:               %[[VAL_13:.*]] = arith.constant 1 : i64
-! CHECK:               %[[VAL_14:.*]] = arith.subi %[[VAL_12]], %[[VAL_13]] : i64
-! CHECK:               %[[VAL_15:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_14]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_16:.*]] = fir.load %[[VAL_15]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_17:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_18:.*]] = fir.convert %[[VAL_16]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_19:.*]] = fir.convert %[[VAL_17]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_20:.*]] = arith.cmpi eq, %[[VAL_18]], %[[VAL_19]] : i1
-! CHECK:               %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i1) -> !fir.logical<4>
-! CHECK:               fir.store %[[VAL_21]] to %[[VAL_9]] : !fir.ref<!fir.logical<4>>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
+! CHECK:                 fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_11:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> i64
+! CHECK:                 %[[VAL_13:.*]] = arith.constant 1 : i64
+! CHECK:                 %[[VAL_14:.*]] = arith.subi %[[VAL_12]], %[[VAL_13]] : i64
+! CHECK:                 %[[VAL_15:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_14]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_16:.*]] = fir.load %[[VAL_15]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_17:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_18:.*]] = fir.convert %[[VAL_16]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_19:.*]] = fir.convert %[[VAL_17]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_20:.*]] = arith.cmpi eq, %[[VAL_18]], %[[VAL_19]] : i1
+! CHECK:                 %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i1) -> !fir.logical<4>
+! CHECK:                 fir.store %[[VAL_21]] to %[[VAL_9]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
@@ -122,45 +126,47 @@ subroutine simple_reduction_switch_order(y)
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_3]] -> %[[VAL_16:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_4]] -> %[[VAL_17:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_18:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
-! CHECK:               fir.store %[[VAL_18]] to %[[VAL_11]] : !fir.ref<i32>
-! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_15]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_20:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
-! CHECK:               %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i32) -> i64
-! CHECK:               %[[VAL_22:.*]] = arith.constant 1 : i64
-! CHECK:               %[[VAL_23:.*]] = arith.subi %[[VAL_21]], %[[VAL_22]] : i64
-! CHECK:               %[[VAL_24:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_23]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_25:.*]] = fir.load %[[VAL_24]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_19]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_27:.*]] = fir.convert %[[VAL_25]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_28:.*]] = arith.cmpi eq, %[[VAL_26]], %[[VAL_27]] : i1
-! CHECK:               %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (i1) -> !fir.logical<4>
-! CHECK:               fir.store %[[VAL_29]] to %[[VAL_15]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_30:.*]] = fir.load %[[VAL_16]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_31:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
-! CHECK:               %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
-! CHECK:               %[[VAL_33:.*]] = arith.constant 1 : i64
-! CHECK:               %[[VAL_34:.*]] = arith.subi %[[VAL_32]], %[[VAL_33]] : i64
-! CHECK:               %[[VAL_35:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_34]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_36:.*]] = fir.load %[[VAL_35]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_37:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_38:.*]] = fir.convert %[[VAL_36]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_39:.*]] = arith.cmpi eq, %[[VAL_37]], %[[VAL_38]] : i1
-! CHECK:               %[[VAL_40:.*]] = fir.convert %[[VAL_39]] : (i1) -> !fir.logical<4>
-! CHECK:               fir.store %[[VAL_40]] to %[[VAL_16]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_41:.*]] = fir.load %[[VAL_17]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_42:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
-! CHECK:               %[[VAL_43:.*]] = fir.convert %[[VAL_42]] : (i32) -> i64
-! CHECK:               %[[VAL_44:.*]] = arith.constant 1 : i64
-! CHECK:               %[[VAL_45:.*]] = arith.subi %[[VAL_43]], %[[VAL_44]] : i64
-! CHECK:               %[[VAL_46:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_45]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_48:.*]] = fir.convert %[[VAL_41]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_49:.*]] = fir.convert %[[VAL_47]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_50:.*]] = arith.cmpi eq, %[[VAL_48]], %[[VAL_49]] : i1
-! CHECK:               %[[VAL_51:.*]] = fir.convert %[[VAL_50]] : (i1) -> !fir.logical<4>
-! CHECK:               fir.store %[[VAL_51]] to %[[VAL_17]] : !fir.ref<!fir.logical<4>>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_3]] -> %[[VAL_16:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_4]] -> %[[VAL_17:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_18:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:                 fir.store %[[VAL_18]] to %[[VAL_11]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_15]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_20:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i32) -> i64
+! CHECK:                 %[[VAL_22:.*]] = arith.constant 1 : i64
+! CHECK:                 %[[VAL_23:.*]] = arith.subi %[[VAL_21]], %[[VAL_22]] : i64
+! CHECK:                 %[[VAL_24:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_23]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_25:.*]] = fir.load %[[VAL_24]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_26:.*]] = fir.convert %[[VAL_19]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_27:.*]] = fir.convert %[[VAL_25]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_28:.*]] = arith.cmpi eq, %[[VAL_26]], %[[VAL_27]] : i1
+! CHECK:                 %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (i1) -> !fir.logical<4>
+! CHECK:                 fir.store %[[VAL_29]] to %[[VAL_15]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_30:.*]] = fir.load %[[VAL_16]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_31:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
+! CHECK:                 %[[VAL_33:.*]] = arith.constant 1 : i64
+! CHECK:                 %[[VAL_34:.*]] = arith.subi %[[VAL_32]], %[[VAL_33]] : i64
+! CHECK:                 %[[VAL_35:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_34]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_36:.*]] = fir.load %[[VAL_35]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_37:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_38:.*]] = fir.convert %[[VAL_36]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_39:.*]] = arith.cmpi eq, %[[VAL_37]], %[[VAL_38]] : i1
+! CHECK:                 %[[VAL_40:.*]] = fir.convert %[[VAL_39]] : (i1) -> !fir.logical<4>
+! CHECK:                 fir.store %[[VAL_40]] to %[[VAL_16]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_41:.*]] = fir.load %[[VAL_17]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_42:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_43:.*]] = fir.convert %[[VAL_42]] : (i32) -> i64
+! CHECK:                 %[[VAL_44:.*]] = arith.constant 1 : i64
+! CHECK:                 %[[VAL_45:.*]] = arith.subi %[[VAL_43]], %[[VAL_44]] : i64
+! CHECK:                 %[[VAL_46:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_45]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_48:.*]] = fir.convert %[[VAL_41]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_49:.*]] = fir.convert %[[VAL_47]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_50:.*]] = arith.cmpi eq, %[[VAL_48]], %[[VAL_49]] : i1
+! CHECK:                 %[[VAL_51:.*]] = fir.convert %[[VAL_50]] : (i1) -> !fir.logical<4>
+! CHECK:                 fir.store %[[VAL_51]] to %[[VAL_17]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv-byref.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv-byref.f90
index a31abd0def56e1..89d16c3191b26e 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv-byref.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv-byref.f90
@@ -37,21 +37,23 @@
 ! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
-! CHECK:               fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<i32>
-! CHECK:               %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_12:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
-! CHECK:               %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i32) -> i64
-! CHECK:               %[[VAL_14:.*]] = arith.constant 1 : i64
-! CHECK:               %[[VAL_15:.*]] = arith.subi %[[VAL_13]], %[[VAL_14]] : i64
-! CHECK:               %[[VAL_16:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_15]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_17:.*]] = fir.load %[[VAL_16]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_18:.*]] = fir.convert %[[VAL_11]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_19:.*]] = fir.convert %[[VAL_17]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_20:.*]] = arith.cmpi ne, %[[VAL_18]], %[[VAL_19]] : i1
-! CHECK:               %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i1) -> !fir.logical<4>
-! CHECK:               fir.store %[[VAL_21]] to %[[VAL_9]] : !fir.ref<!fir.logical<4>>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
+! CHECK:                 fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_12:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i32) -> i64
+! CHECK:                 %[[VAL_14:.*]] = arith.constant 1 : i64
+! CHECK:                 %[[VAL_15:.*]] = arith.subi %[[VAL_13]], %[[VAL_14]] : i64
+! CHECK:                 %[[VAL_16:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_15]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_17:.*]] = fir.load %[[VAL_16]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_18:.*]] = fir.convert %[[VAL_11]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_19:.*]] = fir.convert %[[VAL_17]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_20:.*]] = arith.cmpi ne, %[[VAL_18]], %[[VAL_19]] : i1
+! CHECK:                 %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i1) -> !fir.logical<4>
+! CHECK:                 fir.store %[[VAL_21]] to %[[VAL_9]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
@@ -79,21 +81,23 @@ subroutine simple_reduction(y)
 ! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
-! CHECK:               fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<i32>
-! CHECK:               %[[VAL_11:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
-! CHECK:               %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> i64
-! CHECK:               %[[VAL_13:.*]] = arith.constant 1 : i64
-! CHECK:               %[[VAL_14:.*]] = arith.subi %[[VAL_12]], %[[VAL_13]] : i64
-! CHECK:               %[[VAL_15:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_14]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_16:.*]] = fir.load %[[VAL_15]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_17:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_18:.*]] = fir.convert %[[VAL_16]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_19:.*]] = fir.convert %[[VAL_17]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_20:.*]] = arith.cmpi ne, %[[VAL_18]], %[[VAL_19]] : i1
-! CHECK:               %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i1) -> !fir.logical<4>
-! CHECK:               fir.store %[[VAL_21]] to %[[VAL_9]] : !fir.ref<!fir.logical<4>>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
+! CHECK:                 fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_11:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> i64
+! CHECK:                 %[[VAL_13:.*]] = arith.constant 1 : i64
+! CHECK:                 %[[VAL_14:.*]] = arith.subi %[[VAL_12]], %[[VAL_13]] : i64
+! CHECK:                 %[[VAL_15:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_14]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_16:.*]] = fir.load %[[VAL_15]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_17:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_18:.*]] = fir.convert %[[VAL_16]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_19:.*]] = fir.convert %[[VAL_17]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_20:.*]] = arith.cmpi ne, %[[VAL_18]], %[[VAL_19]] : i1
+! CHECK:                 %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i1) -> !fir.logical<4>
+! CHECK:                 fir.store %[[VAL_21]] to %[[VAL_9]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
@@ -129,45 +133,47 @@ subroutine simple_reduction_switch_order(y)
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_3]] -> %[[VAL_16:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_4]] -> %[[VAL_17:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_18:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
-! CHECK:               fir.store %[[VAL_18]] to %[[VAL_11]] : !fir.ref<i32>
-! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_15]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_20:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
-! CHECK:               %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i32) -> i64
-! CHECK:               %[[VAL_22:.*]] = arith.constant 1 : i64
-! CHECK:               %[[VAL_23:.*]] = arith.subi %[[VAL_21]], %[[VAL_22]] : i64
-! CHECK:               %[[VAL_24:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_23]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_25:.*]] = fir.load %[[VAL_24]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_19]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_27:.*]] = fir.convert %[[VAL_25]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_28:.*]] = arith.cmpi ne, %[[VAL_26]], %[[VAL_27]] : i1
-! CHECK:               %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (i1) -> !fir.logical<4>
-! CHECK:               fir.store %[[VAL_29]] to %[[VAL_15]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_30:.*]] = fir.load %[[VAL_16]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_31:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
-! CHECK:               %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
-! CHECK:               %[[VAL_33:.*]] = arith.constant 1 : i64
-! CHECK:               %[[VAL_34:.*]] = arith.subi %[[VAL_32]], %[[VAL_33]] : i64
-! CHECK:               %[[VAL_35:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_34]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_36:.*]] = fir.load %[[VAL_35]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_37:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_38:.*]] = fir.convert %[[VAL_36]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_39:.*]] = arith.cmpi ne, %[[VAL_37]], %[[VAL_38]] : i1
-! CHECK:               %[[VAL_40:.*]] = fir.convert %[[VAL_39]] : (i1) -> !fir.logical<4>
-! CHECK:               fir.store %[[VAL_40]] to %[[VAL_16]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_41:.*]] = fir.load %[[VAL_17]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_42:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
-! CHECK:               %[[VAL_43:.*]] = fir.convert %[[VAL_42]] : (i32) -> i64
-! CHECK:               %[[VAL_44:.*]] = arith.constant 1 : i64
-! CHECK:               %[[VAL_45:.*]] = arith.subi %[[VAL_43]], %[[VAL_44]] : i64
-! CHECK:               %[[VAL_46:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_45]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_48:.*]] = fir.convert %[[VAL_41]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_49:.*]] = fir.convert %[[VAL_47]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_50:.*]] = arith.cmpi ne, %[[VAL_48]], %[[VAL_49]] : i1
-! CHECK:               %[[VAL_51:.*]] = fir.convert %[[VAL_50]] : (i1) -> !fir.logical<4>
-! CHECK:               fir.store %[[VAL_51]] to %[[VAL_17]] : !fir.ref<!fir.logical<4>>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_3]] -> %[[VAL_16:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_4]] -> %[[VAL_17:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_18:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:                 fir.store %[[VAL_18]] to %[[VAL_11]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_15]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_20:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i32) -> i64
+! CHECK:                 %[[VAL_22:.*]] = arith.constant 1 : i64
+! CHECK:                 %[[VAL_23:.*]] = arith.subi %[[VAL_21]], %[[VAL_22]] : i64
+! CHECK:                 %[[VAL_24:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_23]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_25:.*]] = fir.load %[[VAL_24]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_26:.*]] = fir.convert %[[VAL_19]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_27:.*]] = fir.convert %[[VAL_25]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_28:.*]] = arith.cmpi ne, %[[VAL_26]], %[[VAL_27]] : i1
+! CHECK:                 %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (i1) -> !fir.logical<4>
+! CHECK:                 fir.store %[[VAL_29]] to %[[VAL_15]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_30:.*]] = fir.load %[[VAL_16]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_31:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
+! CHECK:                 %[[VAL_33:.*]] = arith.constant 1 : i64
+! CHECK:                 %[[VAL_34:.*]] = arith.subi %[[VAL_32]], %[[VAL_33]] : i64
+! CHECK:                 %[[VAL_35:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_34]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_36:.*]] = fir.load %[[VAL_35]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_37:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_38:.*]] = fir.convert %[[VAL_36]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_39:.*]] = arith.cmpi ne, %[[VAL_37]], %[[VAL_38]] : i1
+! CHECK:                 %[[VAL_40:.*]] = fir.convert %[[VAL_39]] : (i1) -> !fir.logical<4>
+! CHECK:                 fir.store %[[VAL_40]] to %[[VAL_16]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_41:.*]] = fir.load %[[VAL_17]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_42:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_43:.*]] = fir.convert %[[VAL_42]] : (i32) -> i64
+! CHECK:                 %[[VAL_44:.*]] = arith.constant 1 : i64
+! CHECK:                 %[[VAL_45:.*]] = arith.subi %[[VAL_43]], %[[VAL_44]] : i64
+! CHECK:                 %[[VAL_46:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_45]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_48:.*]] = fir.convert %[[VAL_41]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_49:.*]] = fir.convert %[[VAL_47]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_50:.*]] = arith.cmpi ne, %[[VAL_48]], %[[VAL_49]] : i1
+! CHECK:                 %[[VAL_51:.*]] = fir.convert %[[VAL_50]] : (i1) -> !fir.logical<4>
+! CHECK:                 fir.store %[[VAL_51]] to %[[VAL_17]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv.f90
index 702c185e25ee40..106e867f367b7d 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv.f90
@@ -31,21 +31,23 @@
 ! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
-! CHECK:               fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<i32>
-! CHECK:               %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_12:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
-! CHECK:               %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i32) -> i64
-! CHECK:               %[[VAL_14:.*]] = arith.constant 1 : i64
-! CHECK:               %[[VAL_15:.*]] = arith.subi %[[VAL_13]], %[[VAL_14]] : i64
-! CHECK:               %[[VAL_16:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_15]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_17:.*]] = fir.load %[[VAL_16]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_18:.*]] = fir.convert %[[VAL_11]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_19:.*]] = fir.convert %[[VAL_17]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_20:.*]] = arith.cmpi ne, %[[VAL_18]], %[[VAL_19]] : i1
-! CHECK:               %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i1) -> !fir.logical<4>
-! CHECK:               fir.store %[[VAL_21]] to %[[VAL_9]] : !fir.ref<!fir.logical<4>>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
+! CHECK:                 fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_12:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i32) -> i64
+! CHECK:                 %[[VAL_14:.*]] = arith.constant 1 : i64
+! CHECK:                 %[[VAL_15:.*]] = arith.subi %[[VAL_13]], %[[VAL_14]] : i64
+! CHECK:                 %[[VAL_16:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_15]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_17:.*]] = fir.load %[[VAL_16]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_18:.*]] = fir.convert %[[VAL_11]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_19:.*]] = fir.convert %[[VAL_17]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_20:.*]] = arith.cmpi ne, %[[VAL_18]], %[[VAL_19]] : i1
+! CHECK:                 %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i1) -> !fir.logical<4>
+! CHECK:                 fir.store %[[VAL_21]] to %[[VAL_9]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
@@ -73,21 +75,23 @@ subroutine simple_reduction(y)
 ! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
-! CHECK:               fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<i32>
-! CHECK:               %[[VAL_11:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
-! CHECK:               %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> i64
-! CHECK:               %[[VAL_13:.*]] = arith.constant 1 : i64
-! CHECK:               %[[VAL_14:.*]] = arith.subi %[[VAL_12]], %[[VAL_13]] : i64
-! CHECK:               %[[VAL_15:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_14]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_16:.*]] = fir.load %[[VAL_15]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_17:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_18:.*]] = fir.convert %[[VAL_16]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_19:.*]] = fir.convert %[[VAL_17]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_20:.*]] = arith.cmpi ne, %[[VAL_18]], %[[VAL_19]] : i1
-! CHECK:               %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i1) -> !fir.logical<4>
-! CHECK:               fir.store %[[VAL_21]] to %[[VAL_9]] : !fir.ref<!fir.logical<4>>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
+! CHECK:                 fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_11:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> i64
+! CHECK:                 %[[VAL_13:.*]] = arith.constant 1 : i64
+! CHECK:                 %[[VAL_14:.*]] = arith.subi %[[VAL_12]], %[[VAL_13]] : i64
+! CHECK:                 %[[VAL_15:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_14]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_16:.*]] = fir.load %[[VAL_15]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_17:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_18:.*]] = fir.convert %[[VAL_16]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_19:.*]] = fir.convert %[[VAL_17]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_20:.*]] = arith.cmpi ne, %[[VAL_18]], %[[VAL_19]] : i1
+! CHECK:                 %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i1) -> !fir.logical<4>
+! CHECK:                 fir.store %[[VAL_21]] to %[[VAL_9]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
@@ -123,45 +127,47 @@ subroutine simple_reduction_switch_order(y)
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_3]] -> %[[VAL_16:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_4]] -> %[[VAL_17:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_18:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
-! CHECK:               fir.store %[[VAL_18]] to %[[VAL_11]] : !fir.ref<i32>
-! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_15]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_20:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
-! CHECK:               %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i32) -> i64
-! CHECK:               %[[VAL_22:.*]] = arith.constant 1 : i64
-! CHECK:               %[[VAL_23:.*]] = arith.subi %[[VAL_21]], %[[VAL_22]] : i64
-! CHECK:               %[[VAL_24:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_23]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_25:.*]] = fir.load %[[VAL_24]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_19]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_27:.*]] = fir.convert %[[VAL_25]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_28:.*]] = arith.cmpi ne, %[[VAL_26]], %[[VAL_27]] : i1
-! CHECK:               %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (i1) -> !fir.logical<4>
-! CHECK:               fir.store %[[VAL_29]] to %[[VAL_15]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_30:.*]] = fir.load %[[VAL_16]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_31:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
-! CHECK:               %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
-! CHECK:               %[[VAL_33:.*]] = arith.constant 1 : i64
-! CHECK:               %[[VAL_34:.*]] = arith.subi %[[VAL_32]], %[[VAL_33]] : i64
-! CHECK:               %[[VAL_35:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_34]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_36:.*]] = fir.load %[[VAL_35]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_37:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_38:.*]] = fir.convert %[[VAL_36]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_39:.*]] = arith.cmpi ne, %[[VAL_37]], %[[VAL_38]] : i1
-! CHECK:               %[[VAL_40:.*]] = fir.convert %[[VAL_39]] : (i1) -> !fir.logical<4>
-! CHECK:               fir.store %[[VAL_40]] to %[[VAL_16]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_41:.*]] = fir.load %[[VAL_17]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_42:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
-! CHECK:               %[[VAL_43:.*]] = fir.convert %[[VAL_42]] : (i32) -> i64
-! CHECK:               %[[VAL_44:.*]] = arith.constant 1 : i64
-! CHECK:               %[[VAL_45:.*]] = arith.subi %[[VAL_43]], %[[VAL_44]] : i64
-! CHECK:               %[[VAL_46:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_45]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_48:.*]] = fir.convert %[[VAL_41]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_49:.*]] = fir.convert %[[VAL_47]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_50:.*]] = arith.cmpi ne, %[[VAL_48]], %[[VAL_49]] : i1
-! CHECK:               %[[VAL_51:.*]] = fir.convert %[[VAL_50]] : (i1) -> !fir.logical<4>
-! CHECK:               fir.store %[[VAL_51]] to %[[VAL_17]] : !fir.ref<!fir.logical<4>>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_3]] -> %[[VAL_16:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_4]] -> %[[VAL_17:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_18:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:                 fir.store %[[VAL_18]] to %[[VAL_11]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_15]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_20:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i32) -> i64
+! CHECK:                 %[[VAL_22:.*]] = arith.constant 1 : i64
+! CHECK:                 %[[VAL_23:.*]] = arith.subi %[[VAL_21]], %[[VAL_22]] : i64
+! CHECK:                 %[[VAL_24:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_23]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_25:.*]] = fir.load %[[VAL_24]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_26:.*]] = fir.convert %[[VAL_19]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_27:.*]] = fir.convert %[[VAL_25]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_28:.*]] = arith.cmpi ne, %[[VAL_26]], %[[VAL_27]] : i1
+! CHECK:                 %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (i1) -> !fir.logical<4>
+! CHECK:                 fir.store %[[VAL_29]] to %[[VAL_15]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_30:.*]] = fir.load %[[VAL_16]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_31:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
+! CHECK:                 %[[VAL_33:.*]] = arith.constant 1 : i64
+! CHECK:                 %[[VAL_34:.*]] = arith.subi %[[VAL_32]], %[[VAL_33]] : i64
+! CHECK:                 %[[VAL_35:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_34]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_36:.*]] = fir.load %[[VAL_35]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_37:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_38:.*]] = fir.convert %[[VAL_36]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_39:.*]] = arith.cmpi ne, %[[VAL_37]], %[[VAL_38]] : i1
+! CHECK:                 %[[VAL_40:.*]] = fir.convert %[[VAL_39]] : (i1) -> !fir.logical<4>
+! CHECK:                 fir.store %[[VAL_40]] to %[[VAL_16]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_41:.*]] = fir.load %[[VAL_17]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_42:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_43:.*]] = fir.convert %[[VAL_42]] : (i32) -> i64
+! CHECK:                 %[[VAL_44:.*]] = arith.constant 1 : i64
+! CHECK:                 %[[VAL_45:.*]] = arith.subi %[[VAL_43]], %[[VAL_44]] : i64
+! CHECK:                 %[[VAL_46:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_45]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_48:.*]] = fir.convert %[[VAL_41]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_49:.*]] = fir.convert %[[VAL_47]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_50:.*]] = arith.cmpi ne, %[[VAL_48]], %[[VAL_49]] : i1
+! CHECK:                 %[[VAL_51:.*]] = fir.convert %[[VAL_50]] : (i1) -> !fir.logical<4>
+! CHECK:                 fir.store %[[VAL_51]] to %[[VAL_17]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max-byref.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max-byref.f90
index f0979ab95f568a..a3d193ad6a13dd 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max-byref.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max-byref.f90
@@ -32,25 +32,30 @@
 !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>>
 !CHECK:   %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_max_intEx"}
 !CHECK:   omp.parallel
-!CHECK:     omp.wsloop byref reduction(@max_byref_i32 %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) for
-!CHECK:       %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
-!CHECK:       %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
-!CHECK:       %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
-!CHECK:       %[[RES:.+]] = arith.cmpi sgt, %[[LPRV]], %[[Y_I]] : i32
-!CHECK:       %[[SEL:.+]] = arith.select %[[RES]], %[[LPRV]], %[[Y_I]]
-!CHECK:       fir.store %[[SEL]] to %[[PRV]] : !fir.ref<i32>
+!CHECK:     omp.wsloop byref reduction(@max_byref_i32 %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>)
+!CHECK-NEXT:  omp.loop_nest
+!CHECK:         %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
+!CHECK:         %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
+!CHECK:         %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
+!CHECK:         %[[RES:.+]] = arith.cmpi sgt, %[[LPRV]], %[[Y_I]] : i32
+!CHECK:         %[[SEL:.+]] = arith.select %[[RES]], %[[LPRV]], %[[Y_I]]
+!CHECK:         fir.store %[[SEL]] to %[[PRV]] : !fir.ref<i32>
+!CHECK:         omp.yield
+!CHECK:       omp.terminator
 !CHECK:     omp.terminator
 
 !CHECK-LABEL: @_QPreduction_max_real
 !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xf32>>
 !CHECK:   %[[X_REF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFreduction_max_realEx"}
 !CHECK:   omp.parallel
-!CHECK:     omp.wsloop byref reduction(@max_byref_f32 %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<f32>) for
-!CHECK:       %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<f32>
-!CHECK:       %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
-!CHECK:       %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<f32>
-!CHECK:       %[[RES:.+]] = arith.cmpf ogt, %[[Y_I]], %[[LPRV]] {{.*}} : f32
-!CHECK:       omp.yield
+!CHECK:     omp.wsloop byref reduction(@max_byref_f32 %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<f32>)
+!CHECK-NEXT:  omp.loop_nest
+!CHECK:         %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<f32>
+!CHECK:         %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
+!CHECK:         %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<f32>
+!CHECK:         %[[RES:.+]] = arith.cmpf ogt, %[[Y_I]], %[[LPRV]] {{.*}} : f32
+!CHECK:         omp.yield
+!CHECK:       omp.terminator
 !CHECK:     omp.terminator
 
 subroutine reduction_max_int(y)
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max.f90
index 996296c2adc2b5..fa3840d297bbb8 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max.f90
@@ -21,25 +21,30 @@
 !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>>
 !CHECK:   %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_max_intEx"}
 !CHECK:   omp.parallel
-!CHECK:     omp.wsloop reduction(@[[MAX_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) for
-!CHECK:       %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
-!CHECK:       %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
-!CHECK:       %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
-!CHECK:       %[[RES:.+]] = arith.cmpi sgt, %[[LPRV]], %[[Y_I]] : i32
-!CHECK:       %[[SEL:.+]] = arith.select %[[RES]], %[[LPRV]], %[[Y_I]]
-!CHECK:       fir.store %[[SEL]] to %[[PRV]] : !fir.ref<i32>
+!CHECK:     omp.wsloop reduction(@[[MAX_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>)
+!CHECK-NEXT:  omp.loop_nest
+!CHECK:         %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
+!CHECK:         %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
+!CHECK:         %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
+!CHECK:         %[[RES:.+]] = arith.cmpi sgt, %[[LPRV]], %[[Y_I]] : i32
+!CHECK:         %[[SEL:.+]] = arith.select %[[RES]], %[[LPRV]], %[[Y_I]]
+!CHECK:         fir.store %[[SEL]] to %[[PRV]] : !fir.ref<i32>
+!CHECK:         omp.yield
+!CHECK:       omp.terminator
 !CHECK:     omp.terminator
 
 !CHECK-LABEL: @_QPreduction_max_real
 !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xf32>>
 !CHECK:   %[[X_REF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFreduction_max_realEx"}
 !CHECK:   omp.parallel
-!CHECK:     omp.wsloop reduction(@[[MAX_DECLARE_F]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<f32>) for
-!CHECK:       %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<f32>
-!CHECK:       %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
-!CHECK:       %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<f32>
-!CHECK:       %[[RES:.+]] = arith.cmpf ogt, %[[Y_I]], %[[LPRV]] {{.*}} : f32
-!CHECK:       omp.yield
+!CHECK:     omp.wsloop reduction(@[[MAX_DECLARE_F]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<f32>)
+!CHECK-NEXT:  omp.loop_nest
+!CHECK:         %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<f32>
+!CHECK:         %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
+!CHECK:         %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<f32>
+!CHECK:         %[[RES:.+]] = arith.cmpf ogt, %[[Y_I]], %[[LPRV]] {{.*}} : f32
+!CHECK:         omp.yield
+!CHECK:       omp.terminator
 !CHECK:     omp.terminator
 
 subroutine reduction_max_int(y)
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min-byref.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min-byref.f90
index 24aa8e46e5bbbd..f706ffc43aa970 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min-byref.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min-byref.f90
@@ -32,26 +32,30 @@
 !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>>
 !CHECK:   %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_min_intEx"}
 !CHECK:   omp.parallel
-!CHECK:     omp.wsloop byref reduction(@min_byref_i32 %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) for
-!CHECK:       %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
-!CHECK:       %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
-!CHECK:       %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
-!CHECK:       %[[RES:.+]] = arith.cmpi slt, %[[LPRV]], %[[Y_I]] : i32
-!CHECK:       %[[SEL:.+]] = arith.select %[[RES]], %[[LPRV]], %[[Y_I]]
-!CHECK:       fir.store %[[SEL]] to %[[PRV]] : !fir.ref<i32>
-!CHECK:       omp.yield
+!CHECK:     omp.wsloop byref reduction(@min_byref_i32 %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>)
+!CHECK-NEXT:  omp.loop_nest
+!CHECK:         %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
+!CHECK:         %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
+!CHECK:         %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
+!CHECK:         %[[RES:.+]] = arith.cmpi slt, %[[LPRV]], %[[Y_I]] : i32
+!CHECK:         %[[SEL:.+]] = arith.select %[[RES]], %[[LPRV]], %[[Y_I]]
+!CHECK:         fir.store %[[SEL]] to %[[PRV]] : !fir.ref<i32>
+!CHECK:         omp.yield
+!CHECK:       omp.terminator
 !CHECK:     omp.terminator
 
 !CHECK-LABEL: @_QPreduction_min_real
 !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xf32>>
 !CHECK:   %[[X_REF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFreduction_min_realEx"}
 !CHECK:   omp.parallel
-!CHECK:     omp.wsloop byref reduction(@min_byref_f32 %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<f32>) for
-!CHECK:       %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<f32>
-!CHECK:       %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
-!CHECK:       %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<f32>
-!CHECK:       %[[RES:.+]] = arith.cmpf ogt, %[[Y_I]], %[[LPRV]] {{.*}} : f32
-!CHECK:       omp.yield
+!CHECK:     omp.wsloop byref reduction(@min_byref_f32 %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<f32>)
+!CHECK-NEXT:  omp.loop_nest
+!CHECK:         %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<f32>
+!CHECK:         %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
+!CHECK:         %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<f32>
+!CHECK:         %[[RES:.+]] = arith.cmpf ogt, %[[Y_I]], %[[LPRV]] {{.*}} : f32
+!CHECK:         omp.yield
+!CHECK:       omp.terminator
 !CHECK:     omp.terminator
 
 subroutine reduction_min_int(y)
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min.f90
index 268f51c9dc9330..a373410148cd9d 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min.f90
@@ -21,26 +21,30 @@
 !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>>
 !CHECK:   %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_min_intEx"}
 !CHECK:   omp.parallel
-!CHECK:     omp.wsloop reduction(@[[MIN_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) for
-!CHECK:       %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
-!CHECK:       %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
-!CHECK:       %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
-!CHECK:       %[[RES:.+]] = arith.cmpi slt, %[[LPRV]], %[[Y_I]] : i32
-!CHECK:       %[[SEL:.+]] = arith.select %[[RES]], %[[LPRV]], %[[Y_I]]
-!CHECK:       fir.store %[[SEL]] to %[[PRV]] : !fir.ref<i32>
-!CHECK:       omp.yield
+!CHECK:     omp.wsloop reduction(@[[MIN_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>)
+!CHECK-NEXT:  omp.loop_nest
+!CHECK:         %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
+!CHECK:         %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
+!CHECK:         %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
+!CHECK:         %[[RES:.+]] = arith.cmpi slt, %[[LPRV]], %[[Y_I]] : i32
+!CHECK:         %[[SEL:.+]] = arith.select %[[RES]], %[[LPRV]], %[[Y_I]]
+!CHECK:         fir.store %[[SEL]] to %[[PRV]] : !fir.ref<i32>
+!CHECK:         omp.yield
+!CHECK:       omp.terminator
 !CHECK:     omp.terminator
 
 !CHECK-LABEL: @_QPreduction_min_real
 !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xf32>>
 !CHECK:   %[[X_REF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFreduction_min_realEx"}
 !CHECK:   omp.parallel
-!CHECK:     omp.wsloop reduction(@[[MIN_DECLARE_F]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<f32>) for
-!CHECK:       %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<f32>
-!CHECK:       %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
-!CHECK:       %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<f32>
-!CHECK:       %[[RES:.+]] = arith.cmpf ogt, %[[Y_I]], %[[LPRV]] {{.*}} : f32
-!CHECK:       omp.yield
+!CHECK:     omp.wsloop reduction(@[[MIN_DECLARE_F]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<f32>)
+!CHECK-NEXT:  omp.loop_nest
+!CHECK:         %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<f32>
+!CHECK:         %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
+!CHECK:         %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<f32>
+!CHECK:         %[[RES:.+]] = arith.cmpf ogt, %[[Y_I]], %[[LPRV]] {{.*}} : f32
+!CHECK:         omp.yield
+!CHECK:       omp.terminator
 !CHECK:     omp.terminator
 
 subroutine reduction_min_int(y)
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-simd.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-simd.f90
index 2e3f8ca3c207dd..751e4c8c57094c 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-simd.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-simd.f90
@@ -11,23 +11,26 @@ program wsloop_dynamic
 !CHECK:  omp.parallel {
 
 !$OMP DO SCHEDULE(simd: runtime)
-!CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
-!CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
-!CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-!CHECK:     omp.wsloop schedule(runtime, simd) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
-!CHECK:       fir.store %[[I]] to %[[STORE:.*]] : !fir.ref<i32>
+!CHECK:      %[[WS_LB:.*]] = arith.constant 1 : i32
+!CHECK:      %[[WS_UB:.*]] = arith.constant 9 : i32
+!CHECK:      %[[WS_STEP:.*]] = arith.constant 1 : i32
+!CHECK:      omp.wsloop schedule(runtime, simd) nowait {
+!CHECK-NEXT:   omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
+!CHECK:          fir.store %[[I]] to %[[STORE:.*]] : !fir.ref<i32>
 
   do i=1, 9
     print*, i
-!CHECK:    %[[RTBEGIN:.*]] = fir.call @_FortranAioBeginExternalListOutput
-!CHECK:    %[[LOAD:.*]] = fir.load %[[STORE]] : !fir.ref<i32>
-!CHECK:    fir.call @_FortranAioOutputInteger32(%[[RTBEGIN]], %[[LOAD]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
-!CHECK:    fir.call @_FortranAioEndIoStatement(%[[RTBEGIN]]) {{.*}}: (!fir.ref<i8>) -> i32
+!CHECK:          %[[RTBEGIN:.*]] = fir.call @_FortranAioBeginExternalListOutput
+!CHECK:          %[[LOAD:.*]] = fir.load %[[STORE]] : !fir.ref<i32>
+!CHECK:          fir.call @_FortranAioOutputInteger32(%[[RTBEGIN]], %[[LOAD]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+!CHECK:          fir.call @_FortranAioEndIoStatement(%[[RTBEGIN]]) {{.*}}: (!fir.ref<i8>) -> i32
   end do
-!CHECK:       omp.yield
-!CHECK:         }
-!CHECK:       omp.terminator
-!CHECK:     }
+!CHECK:          omp.yield
+!CHECK:        }
+!CHECK:        omp.terminator
+!CHECK:      }
+!CHECK:      omp.terminator
+!CHECK:    }
 
 !$OMP END DO NOWAIT
 !$OMP END PARALLEL
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-variable.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-variable.f90
index 4f34f30f3e7c98..4bd87601227892 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-variable.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-variable.f90
@@ -14,26 +14,29 @@ program wsloop_variable
   integer(kind=16) :: i16, i16_lb
   real :: x
 
-!CHECK:  %[[TMP0:.*]] = arith.constant 1 : i32
-!CHECK:  %[[TMP1:.*]] = arith.constant 100 : i32
-!CHECK:  %[[TMP2:.*]] = fir.convert %[[TMP0]] : (i32) -> i64
-!CHECK:  %[[TMP3:.*]] = fir.convert %{{.*}} : (i8) -> i64
-!CHECK:  %[[TMP4:.*]] = fir.convert %{{.*}} : (i16) -> i64
-!CHECK:  %[[TMP5:.*]] = fir.convert %{{.*}} : (i128) -> i64
-!CHECK:  %[[TMP6:.*]] = fir.convert %[[TMP1]] : (i32) -> i64
-!CHECK:  %[[TMP7:.*]] = fir.convert %{{.*}} : (i32) -> i64
-!CHECK:  omp.wsloop for (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) {
-!CHECK:    %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i64) -> i16
-!CHECK:    fir.store %[[ARG0_I16]] to %[[STORE_IV0:.*]] : !fir.ref<i16>
-!CHECK:    fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref<i64>
-!CHECK:    %[[LOAD_IV0:.*]] = fir.load %[[STORE_IV0]] : !fir.ref<i16>
-!CHECK:    %[[LOAD_IV0_I64:.*]] = fir.convert %[[LOAD_IV0]] : (i16) -> i64
-!CHECK:    %[[LOAD_IV1:.*]] = fir.load %[[STORE_IV1]] : !fir.ref<i64>
-!CHECK:    %[[TMP10:.*]] = arith.addi %[[LOAD_IV0_I64]], %[[LOAD_IV1]] : i64
-!CHECK:    %[[TMP11:.*]] = fir.convert %[[TMP10]] : (i64) -> f32
-!CHECK:    fir.store %[[TMP11]] to %{{.*}} : !fir.ref<f32>
-!CHECK:    omp.yield
-!CHECK:  }
+!CHECK:      %[[TMP0:.*]] = arith.constant 1 : i32
+!CHECK:      %[[TMP1:.*]] = arith.constant 100 : i32
+!CHECK:      %[[TMP2:.*]] = fir.convert %[[TMP0]] : (i32) -> i64
+!CHECK:      %[[TMP3:.*]] = fir.convert %{{.*}} : (i8) -> i64
+!CHECK:      %[[TMP4:.*]] = fir.convert %{{.*}} : (i16) -> i64
+!CHECK:      %[[TMP5:.*]] = fir.convert %{{.*}} : (i128) -> i64
+!CHECK:      %[[TMP6:.*]] = fir.convert %[[TMP1]] : (i32) -> i64
+!CHECK:      %[[TMP7:.*]] = fir.convert %{{.*}} : (i32) -> i64
+!CHECK:      omp.wsloop {
+!CHECK-NEXT:   omp.loop_nest (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) {
+!CHECK:          %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i64) -> i16
+!CHECK:          fir.store %[[ARG0_I16]] to %[[STORE_IV0:.*]] : !fir.ref<i16>
+!CHECK:          fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref<i64>
+!CHECK:          %[[LOAD_IV0:.*]] = fir.load %[[STORE_IV0]] : !fir.ref<i16>
+!CHECK:          %[[LOAD_IV0_I64:.*]] = fir.convert %[[LOAD_IV0]] : (i16) -> i64
+!CHECK:          %[[LOAD_IV1:.*]] = fir.load %[[STORE_IV1]] : !fir.ref<i64>
+!CHECK:          %[[TMP10:.*]] = arith.addi %[[LOAD_IV0_I64]], %[[LOAD_IV1]] : i64
+!CHECK:          %[[TMP11:.*]] = fir.convert %[[TMP10]] : (i64) -> f32
+!CHECK:          fir.store %[[TMP11]] to %{{.*}} : !fir.ref<f32>
+!CHECK:          omp.yield
+!CHECK:        }
+!CHECK:        omp.terminator
+!CHECK:      }
 
   !$omp do collapse(2)
   do i2 = 1, i1_ub, i2_s
@@ -43,18 +46,20 @@ program wsloop_variable
   end do
   !$omp end do
 
-!CHECK:  %[[TMP12:.*]] = arith.constant 1 : i32
-!CHECK:  %[[TMP13:.*]] = fir.convert %{{.*}} : (i8) -> i32
-!CHECK:  %[[TMP14:.*]] = fir.convert %{{.*}} : (i64) -> i32
-!CHECK:  omp.wsloop for (%[[ARG0:.*]]) : i32 = (%[[TMP12]]) to (%[[TMP13]]) inclusive step (%[[TMP14]])  {
-!CHECK:    %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i32) -> i16
-!CHECK:    fir.store %[[ARG0_I16]] to %[[STORE3:.*]] : !fir.ref<i16>
-!CHECK:    %[[LOAD3:.*]] = fir.load %[[STORE3]] : !fir.ref<i16>
-!CHECK:    %[[TMP16:.*]] = fir.convert %[[LOAD3]] : (i16) -> f32
-
-!CHECK:    fir.store %[[TMP16]] to %{{.*}} : !fir.ref<f32>
-!CHECK:    omp.yield
-!CHECK:  }
+!CHECK:      %[[TMP12:.*]] = arith.constant 1 : i32
+!CHECK:      %[[TMP13:.*]] = fir.convert %{{.*}} : (i8) -> i32
+!CHECK:      %[[TMP14:.*]] = fir.convert %{{.*}} : (i64) -> i32
+!CHECK:      omp.wsloop {
+!CHECK-NEXT:   omp.loop_nest (%[[ARG0:.*]]) : i32 = (%[[TMP12]]) to (%[[TMP13]]) inclusive step (%[[TMP14]]) {
+!CHECK:          %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i32) -> i16
+!CHECK:          fir.store %[[ARG0_I16]] to %[[STORE3:.*]] : !fir.ref<i16>
+!CHECK:          %[[LOAD3:.*]] = fir.load %[[STORE3]] : !fir.ref<i16>
+!CHECK:          %[[TMP16:.*]] = fir.convert %[[LOAD3]] : (i16) -> f32
+!CHECK:          fir.store %[[TMP16]] to %{{.*}} : !fir.ref<f32>
+!CHECK:          omp.yield
+!CHECK:        }
+!CHECK:        omp.terminator
+!CHECK:      }
 
   !$omp do
   do i2 = 1, i1_ub, i8_s
@@ -62,17 +67,20 @@ program wsloop_variable
   end do
   !$omp end do
 
-!CHECK:  %[[TMP17:.*]] = fir.convert %{{.*}} : (i8) -> i64
-!CHECK:  %[[TMP18:.*]] = fir.convert %{{.*}} : (i16) -> i64
-!CHECK:  %[[TMP19:.*]] = fir.convert %{{.*}} : (i32) -> i64
-!CHECK:  omp.wsloop for (%[[ARG1:.*]]) : i64 = (%[[TMP17]]) to (%[[TMP18]]) inclusive step (%[[TMP19]])  {
-!CHECK:    %[[ARG1_I128:.*]] = fir.convert %[[ARG1]] : (i64) -> i128
-!CHECK:    fir.store %[[ARG1_I128]] to %[[STORE4:.*]] : !fir.ref<i128>
-!CHECK:    %[[LOAD4:.*]] = fir.load %[[STORE4]] : !fir.ref<i128>
-!CHECK:    %[[TMP21:.*]] = fir.convert %[[LOAD4]] : (i128) -> f32
-!CHECK:    fir.store %[[TMP21]] to %{{.*}} : !fir.ref<f32>
-!CHECK:    omp.yield
-!CHECK:  }
+!CHECK:      %[[TMP17:.*]] = fir.convert %{{.*}} : (i8) -> i64
+!CHECK:      %[[TMP18:.*]] = fir.convert %{{.*}} : (i16) -> i64
+!CHECK:      %[[TMP19:.*]] = fir.convert %{{.*}} : (i32) -> i64
+!CHECK:      omp.wsloop {
+!CHECK-NEXT:   omp.loop_nest (%[[ARG1:.*]]) : i64 = (%[[TMP17]]) to (%[[TMP18]]) inclusive step (%[[TMP19]]) {
+!CHECK:          %[[ARG1_I128:.*]] = fir.convert %[[ARG1]] : (i64) -> i128
+!CHECK:          fir.store %[[ARG1_I128]] to %[[STORE4:.*]] : !fir.ref<i128>
+!CHECK:          %[[LOAD4:.*]] = fir.load %[[STORE4]] : !fir.ref<i128>
+!CHECK:          %[[TMP21:.*]] = fir.convert %[[LOAD4]] : (i128) -> f32
+!CHECK:          fir.store %[[TMP21]] to %{{.*}} : !fir.ref<f32>
+!CHECK:          omp.yield
+!CHECK:        }
+!CHECK:        omp.terminator
+!CHECK:      }
 
   !$omp do
   do i16 = i1_lb, i2_ub, i4_s
@@ -97,34 +105,37 @@ end program wsloop_variable
 !CHECK:         %[[VAL_9:.*]] = fir.load %[[VAL_3]] : !fir.ref<i16>
 !CHECK:         %[[VAL_10:.*]] = fir.convert %[[VAL_8]] : (i8) -> i32
 !CHECK:         %[[VAL_11:.*]] = fir.convert %[[VAL_9]] : (i16) -> i32
-!CHECK:         omp.wsloop   for  (%[[ARG0:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
-!CHECK:           %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i32) -> i16
-!CHECK:           fir.store %[[ARG0_I16]] to %[[STORE_IV:.*]] : !fir.ref<i16>
-!CHECK:           %[[VAL_13:.*]] = fir.load %[[VAL_0]] : !fir.ref<i128>
-!CHECK:           %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i128) -> index
-!CHECK:           %[[VAL_15:.*]] = arith.constant 100 : i32
-!CHECK:           %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> index
-!CHECK:           %[[VAL_17:.*]] = fir.load %[[VAL_4]] : !fir.ref<i32>
-!CHECK:           %[[VAL_18:.*]] = fir.convert %[[VAL_17]] : (i32) -> index
-!CHECK:           %[[LB:.*]] = fir.convert %[[VAL_14]] : (index) -> i64
-!CHECK:           %[[VAL_19:.*]]:2 = fir.do_loop %[[VAL_20:[^ ]*]] =
-!CHECK-SAME:          %[[VAL_14]] to %[[VAL_16]] step %[[VAL_18]]
-!CHECK-SAME:          iter_args(%[[IV:.*]] = %[[LB]]) -> (index, i64) {
-!CHECK:             fir.store %[[IV]] to %[[VAL_5]] : !fir.ref<i64>
-!CHECK:             %[[LOAD_IV:.*]] = fir.load %[[STORE_IV]] : !fir.ref<i16>
-!CHECK:             %[[VAL_22:.*]] = fir.convert %[[LOAD_IV]] : (i16) -> i64
-!CHECK:             %[[VAL_23:.*]] = fir.load %[[VAL_5]] : !fir.ref<i64>
-!CHECK:             %[[VAL_24:.*]] = arith.addi %[[VAL_22]], %[[VAL_23]] : i64
-!CHECK:             %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (i64) -> f32
-!CHECK:             fir.store %[[VAL_25]] to %[[VAL_6]] : !fir.ref<f32>
-!CHECK:             %[[VAL_26:.*]] = arith.addi %[[VAL_20]], %[[VAL_18]] : index
-!CHECK:             %[[STEPCAST:.*]] = fir.convert %[[VAL_18]] : (index) -> i64
-!CHECK:             %[[IVLOAD:.*]] = fir.load %[[VAL_5]] : !fir.ref<i64>
-!CHECK:             %[[IVINC:.*]] = arith.addi %[[IVLOAD]], %[[STEPCAST]]
-!CHECK:             fir.result %[[VAL_26]], %[[IVINC]] : index, i64
+!CHECK:         omp.wsloop {
+!CHECK-NEXT:      omp.loop_nest (%[[ARG0:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+!CHECK:             %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i32) -> i16
+!CHECK:             fir.store %[[ARG0_I16]] to %[[STORE_IV:.*]] : !fir.ref<i16>
+!CHECK:             %[[VAL_13:.*]] = fir.load %[[VAL_0]] : !fir.ref<i128>
+!CHECK:             %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i128) -> index
+!CHECK:             %[[VAL_15:.*]] = arith.constant 100 : i32
+!CHECK:             %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> index
+!CHECK:             %[[VAL_17:.*]] = fir.load %[[VAL_4]] : !fir.ref<i32>
+!CHECK:             %[[VAL_18:.*]] = fir.convert %[[VAL_17]] : (i32) -> index
+!CHECK:             %[[LB:.*]] = fir.convert %[[VAL_14]] : (index) -> i64
+!CHECK:             %[[VAL_19:.*]]:2 = fir.do_loop %[[VAL_20:[^ ]*]] =
+!CHECK-SAME:            %[[VAL_14]] to %[[VAL_16]] step %[[VAL_18]]
+!CHECK-SAME:            iter_args(%[[IV:.*]] = %[[LB]]) -> (index, i64) {
+!CHECK:               fir.store %[[IV]] to %[[VAL_5]] : !fir.ref<i64>
+!CHECK:               %[[LOAD_IV:.*]] = fir.load %[[STORE_IV]] : !fir.ref<i16>
+!CHECK:               %[[VAL_22:.*]] = fir.convert %[[LOAD_IV]] : (i16) -> i64
+!CHECK:               %[[VAL_23:.*]] = fir.load %[[VAL_5]] : !fir.ref<i64>
+!CHECK:               %[[VAL_24:.*]] = arith.addi %[[VAL_22]], %[[VAL_23]] : i64
+!CHECK:               %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (i64) -> f32
+!CHECK:               fir.store %[[VAL_25]] to %[[VAL_6]] : !fir.ref<f32>
+!CHECK:               %[[VAL_26:.*]] = arith.addi %[[VAL_20]], %[[VAL_18]] : index
+!CHECK:               %[[STEPCAST:.*]] = fir.convert %[[VAL_18]] : (index) -> i64
+!CHECK:               %[[IVLOAD:.*]] = fir.load %[[VAL_5]] : !fir.ref<i64>
+!CHECK:               %[[IVINC:.*]] = arith.addi %[[IVLOAD]], %[[STEPCAST]]
+!CHECK:               fir.result %[[VAL_26]], %[[IVINC]] : index, i64
+!CHECK:             }
+!CHECK:             fir.store %[[VAL_19]]#1 to %[[VAL_5]] : !fir.ref<i64>
+!CHECK:             omp.yield
 !CHECK:           }
-!CHECK:           fir.store %[[VAL_19]]#1 to %[[VAL_5]] : !fir.ref<i64>
-!CHECK:           omp.yield
+!CHECK:           omp.terminator
 !CHECK:         }
 
 subroutine wsloop_variable_sub
@@ -146,16 +157,19 @@ subroutine wsloop_variable_sub
 !CHECK:         %[[C1:.*]] = arith.constant 1 : i32
 !CHECK:         %[[C10:.*]] = arith.constant 10 : i32
 !CHECK:         %[[C1_2:.*]] = arith.constant 1 : i32
-!CHECK:         omp.wsloop for (%[[ARG0:.*]]) : i32 = (%[[C1]]) to (%[[C10]]) inclusive step (%[[C1_2]]) {
-!CHECK:           %[[ARG0_I8:.*]] = fir.convert %[[ARG0]] : (i32) -> i8
-!CHECK:           fir.store %[[ARG0_I8]] to %[[IV2]] : !fir.ref<i8>
-!CHECK:           %[[IV2LOAD:.*]] = fir.load %[[IV2]] : !fir.ref<i8>
-!CHECK:           %[[J1LOAD:.*]] = fir.load %[[J1]] : !fir.ref<i8>
-!CHECK:           %[[VAL_27:.*]] = arith.cmpi eq, %[[IV2LOAD]], %[[J1LOAD]] : i8
-!CHECK:           fir.if %[[VAL_27]] {
-!CHECK:           } else {
+!CHECK:         omp.wsloop {
+!CHECK-NEXT:      omp.loop_nest (%[[ARG0:.*]]) : i32 = (%[[C1]]) to (%[[C10]]) inclusive step (%[[C1_2]]) {
+!CHECK:             %[[ARG0_I8:.*]] = fir.convert %[[ARG0]] : (i32) -> i8
+!CHECK:             fir.store %[[ARG0_I8]] to %[[IV2]] : !fir.ref<i8>
+!CHECK:             %[[IV2LOAD:.*]] = fir.load %[[IV2]] : !fir.ref<i8>
+!CHECK:             %[[J1LOAD:.*]] = fir.load %[[J1]] : !fir.ref<i8>
+!CHECK:             %[[VAL_27:.*]] = arith.cmpi eq, %[[IV2LOAD]], %[[J1LOAD]] : i8
+!CHECK:             fir.if %[[VAL_27]] {
+!CHECK:             } else {
+!CHECK:             }
+!CHECK:             omp.yield
 !CHECK:           }
-!CHECK:           omp.yield
+!CHECK:           omp.terminator
 !CHECK:         }
 
   j1 = 5
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop.f90 b/flang/test/Lower/OpenMP/FIR/wsloop.f90
index abc0489b08ff55..c9e428abdb440e 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop.f90
@@ -7,21 +7,23 @@ subroutine simple_loop
   integer :: i
   ! CHECK:  omp.parallel
   !$OMP PARALLEL
-  ! CHECK:     %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
-  ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
-  ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
-  ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:      %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
+  ! CHECK:      %[[WS_LB:.*]] = arith.constant 1 : i32
+  ! CHECK:      %[[WS_UB:.*]] = arith.constant 9 : i32
+  ! CHECK:      %[[WS_STEP:.*]] = arith.constant 1 : i32
+  ! CHECK:      omp.wsloop {
+  ! CHECK-NEXT:   omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
   !$OMP DO
   do i=1, 9
-  ! CHECK:             fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref<i32>
-  ! CHECK:             %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref<i32>
-  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+  ! CHECK:          fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref<i32>
+  ! CHECK:          %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref<i32>
+  ! CHECK:          fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
-  ! CHECK:       omp.yield
+  ! CHECK:          omp.yield
+  ! CHECK:        omp.terminator
   !$OMP END DO
-  ! CHECK:       omp.terminator
+  ! CHECK:      omp.terminator
   !$OMP END PARALLEL
 end subroutine
 
@@ -30,21 +32,23 @@ subroutine simple_loop_with_step
   integer :: i
   ! CHECK:  omp.parallel
   !$OMP PARALLEL
-  ! CHECK:     %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
-  ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
-  ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
-  ! CHECK:     %[[WS_STEP:.*]] = arith.constant 2 : i32
-  ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
-  ! CHECK:       fir.store %[[I]] to %[[ALLOCA_IV]] : !fir.ref<i32>
-  ! CHECK:       %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref<i32>
+  ! CHECK:      %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
+  ! CHECK:      %[[WS_LB:.*]] = arith.constant 1 : i32
+  ! CHECK:      %[[WS_UB:.*]] = arith.constant 9 : i32
+  ! CHECK:      %[[WS_STEP:.*]] = arith.constant 2 : i32
+  ! CHECK:      omp.wsloop {
+  ! CHECK-NEXT:   omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
+  ! CHECK:          fir.store %[[I]] to %[[ALLOCA_IV]] : !fir.ref<i32>
+  ! CHECK:          %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref<i32>
   !$OMP DO
   do i=1, 9, 2
-  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+  ! CHECK:          fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
-  ! CHECK:       omp.yield
+  ! CHECK:          omp.yield
+  ! CHECK:        omp.terminator
   !$OMP END DO
-  ! CHECK:       omp.terminator
+  ! CHECK:      omp.terminator
   !$OMP END PARALLEL
 end subroutine
 
@@ -53,20 +57,22 @@ subroutine loop_with_schedule_nowait
   integer :: i
   ! CHECK:  omp.parallel
   !$OMP PARALLEL
-  ! CHECK:     %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
-  ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
-  ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
-  ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:     omp.wsloop schedule(runtime) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:      %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
+  ! CHECK:      %[[WS_LB:.*]] = arith.constant 1 : i32
+  ! CHECK:      %[[WS_UB:.*]] = arith.constant 9 : i32
+  ! CHECK:      %[[WS_STEP:.*]] = arith.constant 1 : i32
+  ! CHECK:      omp.wsloop schedule(runtime) nowait {
+  ! CHECK-NEXT:   omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
   !$OMP DO SCHEDULE(runtime)
   do i=1, 9
-  ! CHECK:       fir.store %[[I]] to %[[ALLOCA_IV]] : !fir.ref<i32>
-  ! CHECK:       %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref<i32>
-  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+  ! CHECK:          fir.store %[[I]] to %[[ALLOCA_IV]] : !fir.ref<i32>
+  ! CHECK:          %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref<i32>
+  ! CHECK:          fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
-  ! CHECK:       omp.yield
+  ! CHECK:          omp.yield
+  ! CHECK:        omp.terminator
   !$OMP END DO NOWAIT
-  ! CHECK:       omp.terminator
+  ! CHECK:      omp.terminator
   !$OMP END PARALLEL
 end subroutine
diff --git a/flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90 b/flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90
index 5c624d31b5f36d..c245137f16c7af 100644
--- a/flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90
+++ b/flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90
@@ -12,7 +12,8 @@
 ! CHECK: %[[const_1:.*]] = arith.constant 1 : i32
 ! CHECK: %[[const_2:.*]] = arith.constant 10 : i32
 ! CHECK: %[[const_3:.*]] = arith.constant 1 : i32
-! CHECK: omp.wsloop   for  (%[[ARG:.*]]) : i32 = (%[[const_1]]) to (%[[const_2]]) inclusive step (%[[const_3]]) {
+! CHECK: omp.wsloop {
+! CHECK-NEXT: omp.loop_nest (%[[ARG:.*]]) : i32 = (%[[const_1]]) to (%[[const_2]]) inclusive step (%[[const_3]]) {
 ! CHECK: fir.store %[[ARG]] to %[[TEMP]] : !fir.ref<i32>
 ! EXPECTED: %[[temp_1:.*]] = fir.load %[[PRIVATE_Z]] : !fir.ref<i32>
 ! CHECK: %[[temp_1:.*]] = fir.load %{{.*}} : !fir.ref<i32>
@@ -24,6 +25,8 @@
 ! CHECK: }
 ! CHECK: omp.terminator
 ! CHECK: }
+! CHECK: omp.terminator
+! CHECK: }
 subroutine nested_default_clause()
 	integer x, y, z
 	!$omp parallel do default(private)
diff --git a/flang/test/Lower/OpenMP/copyin.f90 b/flang/test/Lower/OpenMP/copyin.f90
index 895e1abd274f30..dda563303148bb 100644
--- a/flang/test/Lower/OpenMP/copyin.f90
+++ b/flang/test/Lower/OpenMP/copyin.f90
@@ -156,10 +156,13 @@ subroutine copyin_derived_type()
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_12:.*]] = fir.load %[[VAL_9]]#0 : !fir.ref<i32>
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop for  (%[[VAL_14:.*]]) : i32 = (%[[VAL_11]]) to (%[[VAL_12]]) inclusive step (%[[VAL_13]]) {
-! CHECK:               fir.store %[[VAL_14]] to %[[VAL_7]]#1 : !fir.ref<i32>
-! CHECK:               fir.call @_QPsub4(%[[VAL_9]]#1) fastmath<contract> : (!fir.ref<i32>) -> ()
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_14:.*]]) : i32 = (%[[VAL_11]]) to (%[[VAL_12]]) inclusive step (%[[VAL_13]]) {
+! CHECK:                 fir.store %[[VAL_14]] to %[[VAL_7]]#1 : !fir.ref<i32>
+! CHECK:                 fir.call @_QPsub4(%[[VAL_9]]#1) fastmath<contract> : (!fir.ref<i32>) -> ()
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
@@ -320,13 +323,16 @@ subroutine common_1()
 ! CHECK:             %[[VAL_34:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_35:.*]] = fir.load %[[VAL_26]]#0 : !fir.ref<i32>
 ! CHECK:             %[[VAL_36:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop for  (%[[VAL_37:.*]]) : i32 = (%[[VAL_34]]) to (%[[VAL_35]]) inclusive step (%[[VAL_36]]) {
-! CHECK:               fir.store %[[VAL_37]] to %[[VAL_20]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_38:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_39:.*]] = fir.load %[[VAL_20]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_40:.*]] = arith.addi %[[VAL_38]], %[[VAL_39]] : i32
-! CHECK:               hlfir.assign %[[VAL_40]] to %[[VAL_31]]#0 : i32, !fir.ref<i32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_37:.*]]) : i32 = (%[[VAL_34]]) to (%[[VAL_35]]) inclusive step (%[[VAL_36]]) {
+! CHECK:                 fir.store %[[VAL_37]] to %[[VAL_20]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_38:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_39:.*]] = fir.load %[[VAL_20]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_40:.*]] = arith.addi %[[VAL_38]], %[[VAL_39]] : i32
+! CHECK:                 hlfir.assign %[[VAL_40]] to %[[VAL_31]]#0 : i32, !fir.ref<i32>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
diff --git a/flang/test/Lower/OpenMP/default-clause-byref.f90 b/flang/test/Lower/OpenMP/default-clause-byref.f90
index 1167ba7e6ae0d4..6a91927ab02dba 100644
--- a/flang/test/Lower/OpenMP/default-clause-byref.f90
+++ b/flang/test/Lower/OpenMP/default-clause-byref.f90
@@ -352,10 +352,13 @@ subroutine skipped_default_clause_checks()
        type(it)::iii
 
 !CHECK: omp.parallel {
-!CHECK: omp.wsloop byref reduction(@min_byref_i32 %[[VAL_Z_DECLARE]]#0 -> %[[PRV:.+]] : !fir.ref<i32>) for (%[[ARG:.*]]) {{.*}} {
+!CHECK: omp.wsloop byref reduction(@min_byref_i32 %[[VAL_Z_DECLARE]]#0 -> %[[PRV:.+]] : !fir.ref<i32>) {
+!CHECK-NEXT: omp.loop_nest (%[[ARG:.*]]) {{.*}} {
 !CHECK: omp.yield
 !CHECK: }
 !CHECK: omp.terminator
+!CHECK: }
+!CHECK: omp.terminator
 !CHECK: }
        !$omp parallel do default(private) REDUCTION(MIN:z)
          do i = 1, 10
diff --git a/flang/test/Lower/OpenMP/default-clause.f90 b/flang/test/Lower/OpenMP/default-clause.f90
index f86b51aef4e026..1b79755a06ebe8 100644
--- a/flang/test/Lower/OpenMP/default-clause.f90
+++ b/flang/test/Lower/OpenMP/default-clause.f90
@@ -352,10 +352,13 @@ subroutine skipped_default_clause_checks()
        type(it)::iii
 
 !CHECK: omp.parallel {
-!CHECK: omp.wsloop reduction(@min_i32 %[[VAL_Z_DECLARE]]#0 -> %[[PRV:.+]] : !fir.ref<i32>) for (%[[ARG:.*]]) {{.*}} {
+!CHECK: omp.wsloop reduction(@min_i32 %[[VAL_Z_DECLARE]]#0 -> %[[PRV:.+]] : !fir.ref<i32>) {
+!CHECK-NEXT: omp.loop_nest (%[[ARG:.*]]) {{.*}} {
 !CHECK: omp.yield
 !CHECK: }
 !CHECK: omp.terminator
+!CHECK: }
+!CHECK: omp.terminator
 !CHECK: }
        !$omp parallel do default(private) REDUCTION(MIN:z)
          do i = 1, 10
diff --git a/flang/test/Lower/OpenMP/hlfir-wsloop.f90 b/flang/test/Lower/OpenMP/hlfir-wsloop.f90
index b6be77fe3016d1..fea05ae3d6bce3 100644
--- a/flang/test/Lower/OpenMP/hlfir-wsloop.f90
+++ b/flang/test/Lower/OpenMP/hlfir-wsloop.f90
@@ -11,17 +11,19 @@ subroutine simple_loop
   ! CHECK:  omp.parallel
   !$OMP PARALLEL
   ! CHECK-DAG:     %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
-  ! CHECK:     %[[IV:.*]]    = fir.declare %[[ALLOCA_IV]] {uniq_name = "_QFsimple_loopEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
-  ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_ST]]) to (%[[WS_END]]) inclusive step (%[[WS_ST]])
+  ! CHECK:         %[[IV:.*]]    = fir.declare %[[ALLOCA_IV]] {uniq_name = "_QFsimple_loopEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
+  ! CHECK:         omp.wsloop {
+  ! CHECK-NEXT:      omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_ST]]) to (%[[WS_END]]) inclusive step (%[[WS_ST]]) {
   !$OMP DO
   do i=1, 9
   ! CHECK:             fir.store %[[I]] to %[[IV:.*]] : !fir.ref<i32>
   ! CHECK:             %[[LOAD_IV:.*]] = fir.load %[[IV]] : !fir.ref<i32>
-  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+  ! CHECK:             fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
-  ! CHECK:       omp.yield
+  ! CHECK:             omp.yield
+  ! CHECK:           omp.terminator
   !$OMP END DO
-  ! CHECK:       omp.terminator
+  ! CHECK:         omp.terminator
   !$OMP END PARALLEL
 end subroutine
diff --git a/flang/test/Lower/OpenMP/lastprivate-commonblock.f90 b/flang/test/Lower/OpenMP/lastprivate-commonblock.f90
index a11bdee156637b..78adf09c6fe345 100644
--- a/flang/test/Lower/OpenMP/lastprivate-commonblock.f90
+++ b/flang/test/Lower/OpenMP/lastprivate-commonblock.f90
@@ -2,35 +2,38 @@
 
 !CHECK: fir.global common @[[CB_C:.*]](dense<0> : vector<8xi8>) : !fir.array<8xi8>
 !CHECK-LABEL: func.func @_QPlastprivate_common
-!CHECK:    %[[CB_C_REF:.*]] = fir.address_of(@[[CB_C]]) : !fir.ref<!fir.array<8xi8>>
-!CHECK:    %[[CB_C_REF_CVT:.*]] = fir.convert %[[CB_C_REF]] : (!fir.ref<!fir.array<8xi8>>) -> !fir.ref<!fir.array<?xi8>>
-!CHECK:    %[[CB_C_X_COOR:.*]] = fir.coordinate_of %[[CB_C_REF_CVT]], %{{.*}} : (!fir.ref<!fir.array<?xi8>>, index) -> !fir.ref<i8>
-!CHECK:    %[[CB_C_X_ADDR:.*]] = fir.convert %[[CB_C_X_COOR]] : (!fir.ref<i8>) -> !fir.ref<f32>
-!CHECK:    %[[X_DECL:.*]]:2 = hlfir.declare %[[CB_C_X_ADDR]] {uniq_name = "_QFlastprivate_commonEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-!CHECK:    %[[CB_C_REF_CVT:.*]] = fir.convert %[[CB_C_REF]] : (!fir.ref<!fir.array<8xi8>>) -> !fir.ref<!fir.array<?xi8>>
-!CHECK:    %[[CB_C_Y_COOR:.*]] = fir.coordinate_of %[[CB_C_REF_CVT]], %{{.*}} : (!fir.ref<!fir.array<?xi8>>, index) -> !fir.ref<i8>
-!CHECK:    %[[CB_C_Y_ADDR:.*]] = fir.convert %[[CB_C_Y_COOR]] : (!fir.ref<i8>) -> !fir.ref<f32>
-!CHECK:    %[[Y_DECL:.*]]:2 = hlfir.declare %[[CB_C_Y_ADDR]] {uniq_name = "_QFlastprivate_commonEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-!CHECK:    %[[PRIVATE_X_REF:.*]] = fir.alloca f32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivate_commonEx"}
-!CHECK:    %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X_REF]] {uniq_name = "_QFlastprivate_commonEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-!CHECK:    %[[PRIVATE_Y_REF:.*]] = fir.alloca f32 {bindc_name = "y", pinned, uniq_name = "_QFlastprivate_commonEy"}
-!CHECK:    %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y_REF]] {uniq_name = "_QFlastprivate_commonEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-!CHECK:    omp.wsloop   for  (%[[I:.*]]) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
-!CHECK:      %[[V:.*]] = arith.addi %[[I]], %{{.*}} : i32
-!CHECK:      %[[C0:.*]] = arith.constant 0 : i32
-!CHECK:      %[[NEG_STEP:.*]] = arith.cmpi slt, %{{.*}}, %[[C0]] : i32
-!CHECK:      %[[V_LT:.*]] = arith.cmpi slt, %[[V]], %{{.*}} : i32
-!CHECK:      %[[V_GT:.*]] = arith.cmpi sgt, %[[V]], %{{.*}} : i32
-!CHECK:      %[[LAST_ITER:.*]] = arith.select %[[NEG_STEP]], %[[V_LT]], %[[V_GT]] : i1
-!CHECK:      fir.if %[[LAST_ITER]] {
-!CHECK:        fir.store %[[V]] to %{{.*}} : !fir.ref<i32>
-!CHECK:        %[[PRIVATE_X_VAL:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<f32>
-!CHECK:        hlfir.assign %[[PRIVATE_X_VAL]] to %[[X_DECL]]#0 temporary_lhs : f32, !fir.ref<f32>
-!CHECK:        %[[PRIVATE_Y_VAL:.*]] = fir.load %[[PRIVATE_Y_DECL]]#0 : !fir.ref<f32>
-!CHECK:        hlfir.assign %[[PRIVATE_Y_VAL]] to %[[Y_DECL]]#0 temporary_lhs : f32, !fir.ref<f32>
+!CHECK:      %[[CB_C_REF:.*]] = fir.address_of(@[[CB_C]]) : !fir.ref<!fir.array<8xi8>>
+!CHECK:      %[[CB_C_REF_CVT:.*]] = fir.convert %[[CB_C_REF]] : (!fir.ref<!fir.array<8xi8>>) -> !fir.ref<!fir.array<?xi8>>
+!CHECK:      %[[CB_C_X_COOR:.*]] = fir.coordinate_of %[[CB_C_REF_CVT]], %{{.*}} : (!fir.ref<!fir.array<?xi8>>, index) -> !fir.ref<i8>
+!CHECK:      %[[CB_C_X_ADDR:.*]] = fir.convert %[[CB_C_X_COOR]] : (!fir.ref<i8>) -> !fir.ref<f32>
+!CHECK:      %[[X_DECL:.*]]:2 = hlfir.declare %[[CB_C_X_ADDR]] {uniq_name = "_QFlastprivate_commonEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+!CHECK:      %[[CB_C_REF_CVT:.*]] = fir.convert %[[CB_C_REF]] : (!fir.ref<!fir.array<8xi8>>) -> !fir.ref<!fir.array<?xi8>>
+!CHECK:      %[[CB_C_Y_COOR:.*]] = fir.coordinate_of %[[CB_C_REF_CVT]], %{{.*}} : (!fir.ref<!fir.array<?xi8>>, index) -> !fir.ref<i8>
+!CHECK:      %[[CB_C_Y_ADDR:.*]] = fir.convert %[[CB_C_Y_COOR]] : (!fir.ref<i8>) -> !fir.ref<f32>
+!CHECK:      %[[Y_DECL:.*]]:2 = hlfir.declare %[[CB_C_Y_ADDR]] {uniq_name = "_QFlastprivate_commonEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+!CHECK:      %[[PRIVATE_X_REF:.*]] = fir.alloca f32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivate_commonEx"}
+!CHECK:      %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X_REF]] {uniq_name = "_QFlastprivate_commonEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+!CHECK:      %[[PRIVATE_Y_REF:.*]] = fir.alloca f32 {bindc_name = "y", pinned, uniq_name = "_QFlastprivate_commonEy"}
+!CHECK:      %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y_REF]] {uniq_name = "_QFlastprivate_commonEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+!CHECK:      omp.wsloop {
+!CHECK-NEXT:   omp.loop_nest (%[[I:.*]]) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
+!CHECK:          %[[V:.*]] = arith.addi %[[I]], %{{.*}} : i32
+!CHECK:          %[[C0:.*]] = arith.constant 0 : i32
+!CHECK:          %[[NEG_STEP:.*]] = arith.cmpi slt, %{{.*}}, %[[C0]] : i32
+!CHECK:          %[[V_LT:.*]] = arith.cmpi slt, %[[V]], %{{.*}} : i32
+!CHECK:          %[[V_GT:.*]] = arith.cmpi sgt, %[[V]], %{{.*}} : i32
+!CHECK:          %[[LAST_ITER:.*]] = arith.select %[[NEG_STEP]], %[[V_LT]], %[[V_GT]] : i1
+!CHECK:          fir.if %[[LAST_ITER]] {
+!CHECK:            fir.store %[[V]] to %{{.*}} : !fir.ref<i32>
+!CHECK:            %[[PRIVATE_X_VAL:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<f32>
+!CHECK:            hlfir.assign %[[PRIVATE_X_VAL]] to %[[X_DECL]]#0 temporary_lhs : f32, !fir.ref<f32>
+!CHECK:            %[[PRIVATE_Y_VAL:.*]] = fir.load %[[PRIVATE_Y_DECL]]#0 : !fir.ref<f32>
+!CHECK:            hlfir.assign %[[PRIVATE_Y_VAL]] to %[[Y_DECL]]#0 temporary_lhs : f32, !fir.ref<f32>
+!CHECK:          }
+!CHECK:          omp.yield
+!CHECK:        }
+!CHECK:        omp.terminator
 !CHECK:      }
-!CHECK:      omp.yield
-!CHECK:    }
 subroutine lastprivate_common
   common /c/ x, y
   real x, y
diff --git a/flang/test/Lower/OpenMP/lastprivate-iv.f90 b/flang/test/Lower/OpenMP/lastprivate-iv.f90
index 70fe500129d128..24c20281b9c389 100644
--- a/flang/test/Lower/OpenMP/lastprivate-iv.f90
+++ b/flang/test/Lower/OpenMP/lastprivate-iv.f90
@@ -2,28 +2,31 @@
 ! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
 
 !CHECK-LABEL: func @_QPlastprivate_iv_inc
-!CHECK:    %[[I_MEM:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
-!CHECK:    %[[I:.*]]:2 = hlfir.declare %[[I_MEM]] {uniq_name = "_QFlastprivate_iv_incEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK:    %[[I2_MEM:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFlastprivate_iv_incEi"}
-!CHECK:    %[[I2:.*]]:2 = hlfir.declare %[[I2_MEM]] {uniq_name = "_QFlastprivate_iv_incEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK:    %[[LB:.*]] = arith.constant 4 : i32
-!CHECK:    %[[UB:.*]] = arith.constant 10 : i32
-!CHECK:    %[[STEP:.*]]  = arith.constant 3 : i32
-!CHECK:    omp.wsloop for  (%[[IV:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
-!CHECK:      fir.store %[[IV]] to %[[I]]#1 : !fir.ref<i32>
-!CHECK:      %[[V:.*]] = arith.addi %[[IV]], %[[STEP]] : i32
-!CHECK:      %[[C0:.*]] = arith.constant 0 : i32
-!CHECK:      %[[STEP_NEG:.*]] = arith.cmpi slt, %[[STEP]], %[[C0]] : i32
-!CHECK:      %[[V_LT:.*]] = arith.cmpi slt, %[[V]], %[[UB]] : i32
-!CHECK:      %[[V_GT:.*]] = arith.cmpi sgt, %[[V]], %[[UB]] : i32
-!CHECK:      %[[CMP:.*]] = arith.select %[[STEP_NEG]], %[[V_LT]], %[[V_GT]] : i1
-!CHECK:      fir.if %[[CMP]] {
-!CHECK:        fir.store %[[V]] to %[[I]]#1 : !fir.ref<i32>
-!CHECK:        %[[I_VAL:.*]] = fir.load %[[I]]#0 : !fir.ref<i32>
-!CHECK:        hlfir.assign %[[I_VAL]] to %[[I2]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK:      %[[I_MEM:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+!CHECK:      %[[I:.*]]:2 = hlfir.declare %[[I_MEM]] {uniq_name = "_QFlastprivate_iv_incEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:      %[[I2_MEM:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFlastprivate_iv_incEi"}
+!CHECK:      %[[I2:.*]]:2 = hlfir.declare %[[I2_MEM]] {uniq_name = "_QFlastprivate_iv_incEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:      %[[LB:.*]] = arith.constant 4 : i32
+!CHECK:      %[[UB:.*]] = arith.constant 10 : i32
+!CHECK:      %[[STEP:.*]]  = arith.constant 3 : i32
+!CHECK:      omp.wsloop {
+!CHECK-NEXT:   omp.loop_nest (%[[IV:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
+!CHECK:          fir.store %[[IV]] to %[[I]]#1 : !fir.ref<i32>
+!CHECK:          %[[V:.*]] = arith.addi %[[IV]], %[[STEP]] : i32
+!CHECK:          %[[C0:.*]] = arith.constant 0 : i32
+!CHECK:          %[[STEP_NEG:.*]] = arith.cmpi slt, %[[STEP]], %[[C0]] : i32
+!CHECK:          %[[V_LT:.*]] = arith.cmpi slt, %[[V]], %[[UB]] : i32
+!CHECK:          %[[V_GT:.*]] = arith.cmpi sgt, %[[V]], %[[UB]] : i32
+!CHECK:          %[[CMP:.*]] = arith.select %[[STEP_NEG]], %[[V_LT]], %[[V_GT]] : i1
+!CHECK:          fir.if %[[CMP]] {
+!CHECK:            fir.store %[[V]] to %[[I]]#1 : !fir.ref<i32>
+!CHECK:            %[[I_VAL:.*]] = fir.load %[[I]]#0 : !fir.ref<i32>
+!CHECK:            hlfir.assign %[[I_VAL]] to %[[I2]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK:          }
+!CHECK:          omp.yield
+!CHECK:        }
+!CHECK:        omp.terminator
 !CHECK:      }
-!CHECK:      omp.yield
-!CHECK:    }
 subroutine lastprivate_iv_inc()
   integer :: i
 
@@ -34,28 +37,31 @@ subroutine lastprivate_iv_inc()
 end subroutine
 
 !CHECK-LABEL: func @_QPlastprivate_iv_dec
-!CHECK:    %[[I_MEM:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
-!CHECK:    %[[I:.*]]:2 = hlfir.declare %[[I_MEM]] {uniq_name = "_QFlastprivate_iv_decEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK:    %[[I2_MEM:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFlastprivate_iv_decEi"}
-!CHECK:    %[[I2:.*]]:2 = hlfir.declare %[[I2_MEM]] {uniq_name = "_QFlastprivate_iv_decEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK:    %[[LB:.*]] = arith.constant 10 : i32
-!CHECK:    %[[UB:.*]] = arith.constant 1 : i32
-!CHECK:    %[[STEP:.*]]  = arith.constant -3 : i32
-!CHECK:    omp.wsloop for  (%[[IV:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
-!CHECK:      fir.store %[[IV]] to %[[I]]#1 : !fir.ref<i32>
-!CHECK:      %[[V:.*]] = arith.addi %[[IV]], %[[STEP]] : i32
-!CHECK:      %[[C0:.*]] = arith.constant 0 : i32
-!CHECK:      %[[STEP_NEG:.*]] = arith.cmpi slt, %[[STEP]], %[[C0]] : i32
-!CHECK:      %[[V_LT:.*]] = arith.cmpi slt, %[[V]], %[[UB]] : i32
-!CHECK:      %[[V_GT:.*]] = arith.cmpi sgt, %[[V]], %[[UB]] : i32
-!CHECK:      %[[CMP:.*]] = arith.select %[[STEP_NEG]], %[[V_LT]], %[[V_GT]] : i1
-!CHECK:      fir.if %[[CMP]] {
-!CHECK:        fir.store %[[V]] to %[[I]]#1 : !fir.ref<i32>
-!CHECK:        %[[I_VAL:.*]] = fir.load %[[I]]#0 : !fir.ref<i32>
-!CHECK:        hlfir.assign %[[I_VAL]] to %[[I2]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK:      %[[I_MEM:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+!CHECK:      %[[I:.*]]:2 = hlfir.declare %[[I_MEM]] {uniq_name = "_QFlastprivate_iv_decEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:      %[[I2_MEM:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFlastprivate_iv_decEi"}
+!CHECK:      %[[I2:.*]]:2 = hlfir.declare %[[I2_MEM]] {uniq_name = "_QFlastprivate_iv_decEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:      %[[LB:.*]] = arith.constant 10 : i32
+!CHECK:      %[[UB:.*]] = arith.constant 1 : i32
+!CHECK:      %[[STEP:.*]]  = arith.constant -3 : i32
+!CHECK:      omp.wsloop {
+!CHECK-NEXT:   omp.loop_nest (%[[IV:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
+!CHECK:          fir.store %[[IV]] to %[[I]]#1 : !fir.ref<i32>
+!CHECK:          %[[V:.*]] = arith.addi %[[IV]], %[[STEP]] : i32
+!CHECK:          %[[C0:.*]] = arith.constant 0 : i32
+!CHECK:          %[[STEP_NEG:.*]] = arith.cmpi slt, %[[STEP]], %[[C0]] : i32
+!CHECK:          %[[V_LT:.*]] = arith.cmpi slt, %[[V]], %[[UB]] : i32
+!CHECK:          %[[V_GT:.*]] = arith.cmpi sgt, %[[V]], %[[UB]] : i32
+!CHECK:          %[[CMP:.*]] = arith.select %[[STEP_NEG]], %[[V_LT]], %[[V_GT]] : i1
+!CHECK:          fir.if %[[CMP]] {
+!CHECK:            fir.store %[[V]] to %[[I]]#1 : !fir.ref<i32>
+!CHECK:            %[[I_VAL:.*]] = fir.load %[[I]]#0 : !fir.ref<i32>
+!CHECK:            hlfir.assign %[[I_VAL]] to %[[I2]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK:          }
+!CHECK:          omp.yield
+!CHECK:        }
+!CHECK:        omp.terminator
 !CHECK:      }
-!CHECK:      omp.yield
-!CHECK:    }
 subroutine lastprivate_iv_dec()
   integer :: i
 
diff --git a/flang/test/Lower/OpenMP/location.f90 b/flang/test/Lower/OpenMP/location.f90
index 1e01a4828dd9e1..5d340937a81ce0 100644
--- a/flang/test/Lower/OpenMP/location.f90
+++ b/flang/test/Lower/OpenMP/location.f90
@@ -28,11 +28,14 @@ subroutine sub_target()
 
 !CHECK-LABEL: sub_loop
 subroutine sub_loop()
-!CHECK: omp.wsloop {{.*}}  {
+!CHECK: omp.wsloop {
+!CHECK-NEXT: omp.loop_nest {{.*}} {
   !$omp do
   do i=1,10
     print *, i
 !CHECK:   omp.yield loc(#[[LOOP_LOC:.*]])
+!CHECK: } loc(#[[LOOP_LOC]])
+!CHECK:   omp.terminator loc(#[[LOOP_LOC]])
 !CHECK: } loc(#[[LOOP_LOC]])
   end do
   !$omp end do
@@ -60,9 +63,9 @@ subroutine sub_if(c)
 
 !CHECK: #[[PAR_LOC]] = loc("{{.*}}location.f90":9:9)
 !CHECK: #[[TAR_LOC]] = loc("{{.*}}location.f90":21:9)
-!CHECK: #[[LOOP_LOC]] = loc("{{.*}}location.f90":32:9)
-!CHECK: #[[BAR_LOC]] = loc("{{.*}}location.f90":44:9)
-!CHECK: #[[TW_LOC]] = loc("{{.*}}location.f90":46:9)
-!CHECK: #[[TY_LOC]] = loc("{{.*}}location.f90":48:9)
-!CHECK: #[[IF_LOC]] = loc("{{.*}}location.f90":55:14)
-!CHECK: #[[TASK_LOC]] = loc("{{.*}}location.f90":55:9)
+!CHECK: #[[LOOP_LOC]] = loc("{{.*}}location.f90":33:9)
+!CHECK: #[[BAR_LOC]] = loc("{{.*}}location.f90":47:9)
+!CHECK: #[[TW_LOC]] = loc("{{.*}}location.f90":49:9)
+!CHECK: #[[TY_LOC]] = loc("{{.*}}location.f90":51:9)
+!CHECK: #[[IF_LOC]] = loc("{{.*}}location.f90":58:14)
+!CHECK: #[[TASK_LOC]] = loc("{{.*}}location.f90":58:9)
diff --git a/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90 b/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90
index 28f59c95d60bbe..bb81e5eac62f56 100644
--- a/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90
+++ b/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90
@@ -14,8 +14,9 @@
 !CHECK-DAG: %[[ARG1_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG1_PVT]] typeparams %[[FIVE]] {uniq_name = "_QFlastprivate_characterEarg1"} : (!fir.ref<!fir.char<1,5>>, index) -> (!fir.ref<!fir.char<1,5>>, !fir.ref<!fir.char<1,5>>)
 
 ! Check that we are accessing the clone inside the loop
-!CHECK-DAG: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
-!CHECK-DAG: %[[UNIT:.*]] = arith.constant 6 : i32
+!CHECK: omp.wsloop {
+!CHECK-NEXT: omp.loop_nest (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK: %[[UNIT:.*]] = arith.constant 6 : i32
 !CHECK-NEXT: %[[ADDR:.*]] = fir.address_of(@_QQclX
 !CHECK-NEXT: %[[CVT0:.*]] = fir.convert %[[ADDR]] 
 !CHECK-NEXT: %[[CNST:.*]] = arith.constant
@@ -36,9 +37,12 @@
 !CHECK: fir.store %[[V]] to %{{.*}} : !fir.ref<i32>
 
 ! Testing lastprivate val update
-!CHECK-DAG: hlfir.assign %[[ARG1_PVT_DECL]]#0 to %[[ARG1_DECL]]#0 temporary_lhs : !fir.ref<!fir.char<1,5>>, !fir.ref<!fir.char<1,5>>
-!CHECK-DAG: } 
-!CHECK-DAG: omp.yield
+!CHECK: hlfir.assign %[[ARG1_PVT_DECL]]#0 to %[[ARG1_DECL]]#0 temporary_lhs : !fir.ref<!fir.char<1,5>>, !fir.ref<!fir.char<1,5>>
+!CHECK: } 
+!CHECK: omp.yield
+!CHECK: }
+!CHECK: omp.terminator
+!CHECK: }
 
 subroutine lastprivate_character(arg1)
         character(5) :: arg1
@@ -57,7 +61,8 @@ subroutine lastprivate_character(arg1)
 !CHECK-DAG: omp.parallel  {
 !CHECK-DAG: %[[CLONE:.*]] = fir.alloca i32 {bindc_name = "arg1"
 !CHECK-DAG: %[[CLONE_DECL:.*]]:2 = hlfir.declare %[[CLONE]] {uniq_name = "_QFlastprivate_intEarg1"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK: omp.wsloop {
+!CHECK-NEXT: omp.loop_nest (%[[INDX_WS:.*]]) : {{.*}} {
 
 ! Testing last iteration check
 !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
@@ -72,8 +77,11 @@ subroutine lastprivate_character(arg1)
 ! Testing lastprivate val update
 !CHECK-NEXT: %[[CLONE_LD:.*]] = fir.load %[[CLONE_DECL]]#0 : !fir.ref<i32>
 !CHECK:      hlfir.assign %[[CLONE_LD]] to %[[ARG1_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
-!CHECK-DAG: }
-!CHECK-DAG: omp.yield
+!CHECK: }
+!CHECK: omp.yield
+!CHECK: }
+!CHECK: omp.terminator
+!CHECK: }
 
 subroutine lastprivate_int(arg1)
         integer :: arg1
@@ -96,7 +104,8 @@ subroutine lastprivate_int(arg1)
 !CHECK-DAG: %[[CLONE1_DECL:.*]]:2 = hlfir.declare %[[CLONE1]] {uniq_name = "_QFmult_lastprivate_intEarg1"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2"
 !CHECK-DAG: %[[CLONE2_DECL:.*]]:2 = hlfir.declare %[[CLONE2]] {uniq_name = "_QFmult_lastprivate_intEarg2"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK: omp.wsloop {
+!CHECK-NEXT: omp.loop_nest (%[[INDX_WS:.*]]) : {{.*}} {
 
 ! Testing last iteration check
 !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
@@ -114,6 +123,9 @@ subroutine lastprivate_int(arg1)
 !CHECK-DAG: hlfir.assign %[[CLONE_LD2]] to %[[ARG2_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
 !CHECK: }
 !CHECK: omp.yield
+!CHECK: }
+!CHECK: omp.terminator
+!CHECK: }
 
 subroutine mult_lastprivate_int(arg1, arg2)
         integer :: arg1, arg2
@@ -137,7 +149,8 @@ subroutine mult_lastprivate_int(arg1, arg2)
 !CHECK-DAG: %[[CLONE1_DECL:.*]]:2 = hlfir.declare %[[CLONE1]] {uniq_name = "_QFmult_lastprivate_int2Earg1"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2"
 !CHECK-DAG: %[[CLONE2_DECL:.*]]:2 = hlfir.declare %[[CLONE2]] {uniq_name = "_QFmult_lastprivate_int2Earg2"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK: omp.wsloop {
+!CHECK-NEXT: omp.loop_nest (%[[INDX_WS:.*]]) : {{.*}} {
 
 !Testing last iteration check
 !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
@@ -155,6 +168,9 @@ subroutine mult_lastprivate_int(arg1, arg2)
 !CHECK-DAG: hlfir.assign %[[CLONE_LD1]] to %[[ARG1_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
 !CHECK: }
 !CHECK: omp.yield
+!CHECK: }
+!CHECK: omp.terminator
+!CHECK: }
 
 subroutine mult_lastprivate_int2(arg1, arg2)
         integer :: arg1, arg2
@@ -183,7 +199,8 @@ subroutine mult_lastprivate_int2(arg1, arg2)
 !CHECK: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2"
 !CHECK: %[[CLONE2_DECL:.*]]:2 = hlfir.declare %[[CLONE2]] {uniq_name = "_QFfirstpriv_lastpriv_intEarg2"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK-NOT: omp.barrier
-!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK: omp.wsloop {
+!CHECK-NEXT: omp.loop_nest (%[[INDX_WS:.*]]) : {{.*}} {
 
 ! Testing last iteration check
 !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
@@ -199,6 +216,9 @@ subroutine mult_lastprivate_int2(arg1, arg2)
 !CHECK-NEXT: hlfir.assign %[[CLONE_LD]] to %[[ARG2_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
 !CHECK-NEXT: }
 !CHECK-NEXT: omp.yield
+!CHECK-NEXT: }
+!CHECK-NEXT: omp.terminator
+!CHECK-NEXT: }
 
 subroutine firstpriv_lastpriv_int(arg1, arg2)
         integer :: arg1, arg2
@@ -223,7 +243,8 @@ subroutine firstpriv_lastpriv_int(arg1, arg2)
 !CHECK-NEXT: %[[FPV_LD:.*]] = fir.load %[[ARG1_DECL]]#0 : !fir.ref<i32>
 !CHECK-NEXT: hlfir.assign %[[FPV_LD]] to %[[CLONE1_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
 !CHECK-NEXT: omp.barrier
-!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK: omp.wsloop {
+!CHECK-NEXT: omp.loop_nest (%[[INDX_WS:.*]]) : {{.*}} {
 ! Testing last iteration check
 !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
 !CHECK: %[[C0:.*]] = arith.constant 0 : i32
@@ -238,6 +259,9 @@ subroutine firstpriv_lastpriv_int(arg1, arg2)
 !CHECK-NEXT: hlfir.assign %[[CLONE_LD]] to %[[ARG1_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
 !CHECK-NEXT: }
 !CHECK-NEXT: omp.yield
+!CHECK-NEXT: }
+!CHECK-NEXT: omp.terminator
+!CHECK-NEXT: }
 
 subroutine firstpriv_lastpriv_int2(arg1)
         integer :: arg1
diff --git a/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90 b/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90
index 8533106b7ac487..93809fde98a269 100644
--- a/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90
+++ b/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90
@@ -21,30 +21,33 @@
 ! CHECK:           %[[ONE:.*]] = arith.constant 1 : i32
 ! CHECK:           %[[VAL_3:.*]] = fir.load %[[GAMA_DECL]]#0 : !fir.ref<i32>
 ! CHECK:           %[[VAL_5:.*]] = arith.constant 1 : i32
-! CHECK:           omp.wsloop for (%[[VAL_6:.*]]) : i32 = (%[[ONE]]) to (%[[VAL_3]]) inclusive step (%[[VAL_5]]) {
-! CHECK:             fir.store %[[VAL_6]] to %[[PRIV_I_DECL]]#1 : !fir.ref<i32>
-! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
-! CHECK:             %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i32) -> index
-! CHECK:             %[[VAL_9:.*]] = fir.load %[[GAMA_DECL]]#0 : !fir.ref<i32>
-! CHECK:             %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i32) -> index
-! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : index
-! CHECK:             %[[LB:.*]] = fir.convert %[[VAL_8]] : (index) -> i32
-! CHECK:             %[[VAL_12:.*]]:2 = fir.do_loop %[[VAL_13:[^ ]*]] =
-! CHECK-SAME:            %[[VAL_8]] to %[[VAL_10]] step %[[VAL_11]]
-! CHECK-SAME:            iter_args(%[[IV:.*]] = %[[LB]]) -> (index, i32) {
-! CHECK:               fir.store %[[IV]] to %[[PRIV_J_DECL]]#1 : !fir.ref<i32>
-! CHECK:               %[[LOAD:.*]] = fir.load %[[PRIV_I_DECL]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_15:.*]] = fir.load %[[PRIV_J_DECL]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_16:.*]] = arith.addi %[[LOAD]], %[[VAL_15]] : i32
-! CHECK:               hlfir.assign %[[VAL_16]] to %[[PRIV_X_DECL]]#0 : i32, !fir.ref<i32>
-! CHECK:               %[[VAL_17:.*]] = arith.addi %[[VAL_13]], %[[VAL_11]] : index
-! CHECK:               %[[STEPCAST:.*]] = fir.convert %[[VAL_11]] : (index) -> i32
-! CHECK:               %[[IVLOAD:.*]] = fir.load %[[PRIV_J_DECL]]#1 : !fir.ref<i32>
-! CHECK:               %[[IVINC:.*]] = arith.addi %[[IVLOAD]], %[[STEPCAST]]
-! CHECK:               fir.result %[[VAL_17]], %[[IVINC]] : index, i32
+! CHECK:           omp.wsloop {
+! CHECK-NEXT:        omp.loop_nest (%[[VAL_6:.*]]) : i32 = (%[[ONE]]) to (%[[VAL_3]]) inclusive step (%[[VAL_5]]) {
+! CHECK:               fir.store %[[VAL_6]] to %[[PRIV_I_DECL]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_7:.*]] = arith.constant 1 : i32
+! CHECK:               %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i32) -> index
+! CHECK:               %[[VAL_9:.*]] = fir.load %[[GAMA_DECL]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i32) -> index
+! CHECK:               %[[VAL_11:.*]] = arith.constant 1 : index
+! CHECK:               %[[LB:.*]] = fir.convert %[[VAL_8]] : (index) -> i32
+! CHECK:               %[[VAL_12:.*]]:2 = fir.do_loop %[[VAL_13:[^ ]*]] =
+! CHECK-SAME:              %[[VAL_8]] to %[[VAL_10]] step %[[VAL_11]]
+! CHECK-SAME:              iter_args(%[[IV:.*]] = %[[LB]]) -> (index, i32) {
+! CHECK:                 fir.store %[[IV]] to %[[PRIV_J_DECL]]#1 : !fir.ref<i32>
+! CHECK:                 %[[LOAD:.*]] = fir.load %[[PRIV_I_DECL]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_15:.*]] = fir.load %[[PRIV_J_DECL]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_16:.*]] = arith.addi %[[LOAD]], %[[VAL_15]] : i32
+! CHECK:                 hlfir.assign %[[VAL_16]] to %[[PRIV_X_DECL]]#0 : i32, !fir.ref<i32>
+! CHECK:                 %[[VAL_17:.*]] = arith.addi %[[VAL_13]], %[[VAL_11]] : index
+! CHECK:                 %[[STEPCAST:.*]] = fir.convert %[[VAL_11]] : (index) -> i32
+! CHECK:                 %[[IVLOAD:.*]] = fir.load %[[PRIV_J_DECL]]#1 : !fir.ref<i32>
+! CHECK:                 %[[IVINC:.*]] = arith.addi %[[IVLOAD]], %[[STEPCAST]]
+! CHECK:                 fir.result %[[VAL_17]], %[[IVINC]] : index, i32
+! CHECK:               }
+! CHECK:               fir.store %[[VAL_12]]#1 to %[[PRIV_J_DECL]]#1 : !fir.ref<i32>
+! CHECK:               omp.yield
 ! CHECK:             }
-! CHECK:             fir.store %[[VAL_12]]#1 to %[[PRIV_J_DECL]]#1 : !fir.ref<i32>
-! CHECK:             omp.yield
+! CHECK:             omp.terminator
 ! CHECK:           }
 ! CHECK:           omp.terminator
 ! CHECK:         }
diff --git a/flang/test/Lower/OpenMP/parallel-private-clause.f90 b/flang/test/Lower/OpenMP/parallel-private-clause.f90
index 775f7b4f2cb106..b9b58a135aaa2c 100644
--- a/flang/test/Lower/OpenMP/parallel-private-clause.f90
+++ b/flang/test/Lower/OpenMP/parallel-private-clause.f90
@@ -292,33 +292,35 @@ subroutine simple_loop_1
   real, allocatable :: r;
   ! FIRDialect:  omp.parallel
   !$OMP PARALLEL PRIVATE(r)
-  ! FIRDialect:     %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
+  ! FIRDialect:      %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
 
-  ! FIRDialect:     %[[ALLOCA_IV_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_IV]] {uniq_name = "_QFsimple_loop_1Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  ! FIRDialect:     [[R:%.*]] = fir.alloca !fir.box<!fir.heap<f32>> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"}
-  ! FIRDialect:     fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
-  ! FIRDialect:     fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
-  ! FIRDialect:     %[[R_DECL:.*]]:2 = hlfir.declare [[R]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsimple_loop_1Er"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)
+  ! FIRDialect:      %[[ALLOCA_IV_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_IV]] {uniq_name = "_QFsimple_loop_1Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  ! FIRDialect:      [[R:%.*]] = fir.alloca !fir.box<!fir.heap<f32>> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"}
+  ! FIRDialect:      fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:      fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:      %[[R_DECL:.*]]:2 = hlfir.declare [[R]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsimple_loop_1Er"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)
 
-  ! FIRDialect:     %[[WS_LB:.*]] = arith.constant 1 : i32
-  ! FIRDialect:     %[[WS_UB:.*]] = arith.constant 9 : i32
-  ! FIRDialect:     %[[WS_STEP:.*]] = arith.constant 1 : i32
+  ! FIRDialect:      %[[WS_LB:.*]] = arith.constant 1 : i32
+  ! FIRDialect:      %[[WS_UB:.*]] = arith.constant 9 : i32
+  ! FIRDialect:      %[[WS_STEP:.*]] = arith.constant 1 : i32
 
-  ! FIRDialect:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! FIRDialect:      omp.wsloop {
+  ! FIRDialect-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
   !$OMP DO
   do i=1, 9
-  ! FIRDialect:     fir.store %[[I]] to %[[ALLOCA_IV_DECL]]#1 : !fir.ref<i32>
-  ! FIRDialect:     %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV_DECL]]#0 : !fir.ref<i32>
-  ! FIRDialect:     fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}} : (!fir.ref<i8>, i32) -> i1
+  ! FIRDialect:      fir.store %[[I]] to %[[ALLOCA_IV_DECL]]#1 : !fir.ref<i32>
+  ! FIRDialect:      %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV_DECL]]#0 : !fir.ref<i32>
+  ! FIRDialect:      fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}} : (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
-  ! FIRDialect:     omp.yield
-  ! FIRDialect:     {{%.*}} = fir.load %[[R_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<f32>>>
-  ! FIRDialect:     fir.if {{%.*}} {
-  ! FIRDialect:     [[LD:%.*]] = fir.load %[[R_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<f32>>>
-  ! FIRDialect:     [[AD:%.*]] = fir.box_addr [[LD]] : (!fir.box<!fir.heap<f32>>) -> !fir.heap<f32>
-  ! FIRDialect:     fir.freemem [[AD]] : !fir.heap<f32>
-  ! FIRDialect:     fir.store {{%.*}} to %[[R_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:      omp.yield
+  ! FIRDialect:      omp.terminator
+  ! FIRDialect:      {{%.*}} = fir.load %[[R_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:      fir.if {{%.*}} {
+  ! FIRDialect:      [[LD:%.*]] = fir.load %[[R_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:      [[AD:%.*]] = fir.box_addr [[LD]] : (!fir.box<!fir.heap<f32>>) -> !fir.heap<f32>
+  ! FIRDialect:      fir.freemem [[AD]] : !fir.heap<f32>
+  ! FIRDialect:      fir.store {{%.*}} to %[[R_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<f32>>>
   !$OMP END DO
   ! FIRDialect:  omp.terminator
   !$OMP END PARALLEL
@@ -330,19 +332,20 @@ subroutine simple_loop_2
   real, allocatable :: r;
   ! FIRDialect:  omp.parallel
   !$OMP PARALLEL
-  ! FIRDialect:     %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
+  ! FIRDialect:      %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
 
-  ! FIRDialect:     %[[ALLOCA_IV_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_IV]] {uniq_name = "{{.*}}Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  ! FIRDialect:     [[R:%.*]] = fir.alloca !fir.box<!fir.heap<f32>> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"}
-  ! FIRDialect:     fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
-  ! FIRDialect:     fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
-  ! FIRDialect:     %[[R_DECL:.*]]:2 = hlfir.declare [[R]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "{{.*}}Er"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)
+  ! FIRDialect:      %[[ALLOCA_IV_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_IV]] {uniq_name = "{{.*}}Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  ! FIRDialect:      [[R:%.*]] = fir.alloca !fir.box<!fir.heap<f32>> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"}
+  ! FIRDialect:      fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:      fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:      %[[R_DECL:.*]]:2 = hlfir.declare [[R]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "{{.*}}Er"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)
 
-  ! FIRDialect:     %[[WS_LB:.*]] = arith.constant 1 : i32
-  ! FIRDialect:     %[[WS_UB:.*]] = arith.constant 9 : i32
-  ! FIRDialect:     %[[WS_STEP:.*]] = arith.constant 1 : i32
+  ! FIRDialect:      %[[WS_LB:.*]] = arith.constant 1 : i32
+  ! FIRDialect:      %[[WS_UB:.*]] = arith.constant 9 : i32
+  ! FIRDialect:      %[[WS_STEP:.*]] = arith.constant 1 : i32
 
-  ! FIRDialect:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! FIRDialect:      omp.wsloop {
+  ! FIRDialect-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
   !$OMP DO PRIVATE(r)
   do i=1, 9
   ! FIRDialect:     fir.store %[[I]] to %[[ALLOCA_IV_DECL]]#1 : !fir.ref<i32>
@@ -351,6 +354,7 @@ subroutine simple_loop_2
     print*, i
   end do
   ! FIRDialect:     omp.yield
+  ! FIRDialect:     omp.terminator
   ! FIRDialect:     {{%.*}} = fir.load %[[R_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<f32>>>
   ! FIRDialect:     fir.if {{%.*}} {
   ! FIRDialect:     [[LD:%.*]] = fir.load %[[R_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<f32>>>
@@ -367,33 +371,35 @@ subroutine simple_loop_3
   integer :: i
   real, allocatable :: r;
   ! FIRDialect:  omp.parallel
-  ! FIRDialect:     %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
-  ! FIRDialect:     %[[ALLOCA_IV_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_IV]] {uniq_name = "{{.*}}Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  ! FIRDialect:      %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
+  ! FIRDialect:      %[[ALLOCA_IV_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_IV]] {uniq_name = "{{.*}}Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 
-  ! FIRDialect:     [[R:%.*]] = fir.alloca !fir.box<!fir.heap<f32>> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"}
-  ! FIRDialect:     fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
-  ! FIRDialect:     fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
-  ! FIRDialect:     [[R_DECL:%.*]]:2 = hlfir.declare [[R]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "{{.*}}Er"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)
+  ! FIRDialect:      [[R:%.*]] = fir.alloca !fir.box<!fir.heap<f32>> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"}
+  ! FIRDialect:      fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:      fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:      [[R_DECL:%.*]]:2 = hlfir.declare [[R]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "{{.*}}Er"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)
 
-  ! FIRDialect:     %[[WS_LB:.*]] = arith.constant 1 : i32
-  ! FIRDialect:     %[[WS_UB:.*]] = arith.constant 9 : i32
-  ! FIRDialect:     %[[WS_STEP:.*]] = arith.constant 1 : i32
+  ! FIRDialect:      %[[WS_LB:.*]] = arith.constant 1 : i32
+  ! FIRDialect:      %[[WS_UB:.*]] = arith.constant 9 : i32
+  ! FIRDialect:      %[[WS_STEP:.*]] = arith.constant 1 : i32
 
-  ! FIRDialect:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! FIRDialect:      omp.wsloop {
+  ! FIRDialect-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
   !$OMP PARALLEL DO PRIVATE(r)
   do i=1, 9
-  ! FIRDialect:     fir.store %[[I]] to %[[ALLOCA_IV_DECL:.*]]#1 : !fir.ref<i32>
-  ! FIRDialect:     %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV_DECL]]#0 : !fir.ref<i32>
-  ! FIRDialect:     fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+  ! FIRDialect:      fir.store %[[I]] to %[[ALLOCA_IV_DECL:.*]]#1 : !fir.ref<i32>
+  ! FIRDialect:      %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV_DECL]]#0 : !fir.ref<i32>
+  ! FIRDialect:      fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
-  ! FIRDialect:     omp.yield
-  ! FIRDialect:     {{%.*}} = fir.load [[R_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<f32>>>
-  ! FIRDialect:     fir.if {{%.*}} {
-  ! FIRDialect:     [[LD:%.*]] = fir.load [[R_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<f32>>>
-  ! FIRDialect:     [[AD:%.*]] = fir.box_addr [[LD]] : (!fir.box<!fir.heap<f32>>) -> !fir.heap<f32>
-  ! FIRDialect:     fir.freemem [[AD]] : !fir.heap<f32>
-  ! FIRDialect:     fir.store {{%.*}} to [[R_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:      omp.yield
+  ! FIRDialect:      omp.terminator
+  ! FIRDialect:      {{%.*}} = fir.load [[R_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:      fir.if {{%.*}} {
+  ! FIRDialect:      [[LD:%.*]] = fir.load [[R_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:      [[AD:%.*]] = fir.box_addr [[LD]] : (!fir.box<!fir.heap<f32>>) -> !fir.heap<f32>
+  ! FIRDialect:      fir.freemem [[AD]] : !fir.heap<f32>
+  ! FIRDialect:      fir.store {{%.*}} to [[R_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<f32>>>
   !$OMP END PARALLEL DO
   ! FIRDialect:  omp.terminator
 end subroutine
diff --git a/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90 b/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90
index 716a7d71bb6288..ac8b9f50f54e67 100644
--- a/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90
+++ b/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90
@@ -20,10 +20,14 @@ subroutine omp_do_firstprivate(a)
   ! CHECK: %[[LB:.*]] = arith.constant 1 : i32
   ! CHECK-NEXT: %[[UB:.*]] = fir.load %[[A_PVT_DECL]]#0 : !fir.ref<i32>
   ! CHECK-NEXT: %[[STEP:.*]] = arith.constant 1 : i32
-  ! CHECK-NEXT: omp.wsloop   for  (%[[ARG1:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
+  ! CHECK-NEXT: omp.wsloop {
+  ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
   ! CHECK-NEXT: fir.store %[[ARG1]] to %[[I_PVT_DECL]]#1 : !fir.ref<i32>
   ! CHECK-NEXT: fir.call @_QPfoo(%[[I_PVT_DECL]]#1, %[[A_PVT_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
   ! CHECK-NEXT: omp.yield
+  ! CHECK-NEXT: }
+  ! CHECK-NEXT: omp.terminator
+  ! CHECK-NEXT: }
     do i=1, a
       call foo(i, a)
     end do
@@ -56,10 +60,12 @@ subroutine omp_do_firstprivate2(a, n)
   ! CHECK: %[[LB:.*]] = fir.load %[[A_PVT_DECL]]#0 : !fir.ref<i32>
   ! CHECK: %[[UB:.*]] = fir.load %[[N_PVT_DECL]]#0 : !fir.ref<i32>
   ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32
-  ! CHECK: omp.wsloop   for  (%[[ARG2:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
+  ! CHECK: omp.wsloop {
+  ! CHECK-NEXT: omp.loop_nest (%[[ARG2:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
   ! CHECK: fir.store %[[ARG2]] to %[[I_PVT_DECL]]#1 : !fir.ref<i32>
   ! CHECK: fir.call @_QPfoo(%[[I_PVT_DECL]]#1, %[[A_PVT_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
   ! CHECK: omp.yield
+  ! CHECK: omp.terminator
     do i= a, n
       call foo(i, a)
     end do
diff --git a/flang/test/Lower/OpenMP/parallel-wsloop.f90 b/flang/test/Lower/OpenMP/parallel-wsloop.f90
index c06f941b74b582..602b3d1c05f0de 100644
--- a/flang/test/Lower/OpenMP/parallel-wsloop.f90
+++ b/flang/test/Lower/OpenMP/parallel-wsloop.f90
@@ -6,19 +6,21 @@
 subroutine simple_parallel_do
   integer :: i
   ! CHECK:  omp.parallel
-  ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
-  ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
-  ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:      %[[WS_LB:.*]] = arith.constant 1 : i32
+  ! CHECK:      %[[WS_UB:.*]] = arith.constant 9 : i32
+  ! CHECK:      %[[WS_STEP:.*]] = arith.constant 1 : i32
+  ! CHECK:      omp.wsloop {
+  ! CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
   !$OMP PARALLEL DO
   do i=1, 9
-  ! CHECK:    fir.store %[[I]] to %[[IV_ADDR:.*]]#1 : !fir.ref<i32>
-  ! CHECK:    %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]]#0 : !fir.ref<i32>
-  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+  ! CHECK:      fir.store %[[I]] to %[[IV_ADDR:.*]]#1 : !fir.ref<i32>
+  ! CHECK:      %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]]#0 : !fir.ref<i32>
+  ! CHECK:      fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
-  ! CHECK:       omp.yield
-  ! CHECK:       omp.terminator
+  ! CHECK:      omp.yield
+  ! CHECK:      omp.terminator
+  ! CHECK:      omp.terminator
   !$OMP END PARALLEL DO
 end subroutine
 
@@ -34,19 +36,21 @@ subroutine parallel_do_with_parallel_clauses(cond, nt)
   ! CHECK:  %[[COND_CVT:.*]] = fir.convert %[[COND]] : (!fir.logical<4>) -> i1
   ! CHECK:  %[[NT:.*]] = fir.load %[[NT_DECL]]#0 : !fir.ref<i32>
   ! CHECK:  omp.parallel if(%[[COND_CVT]] : i1) num_threads(%[[NT]] : i32) proc_bind(close)
-  ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
-  ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
-  ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:      %[[WS_LB:.*]] = arith.constant 1 : i32
+  ! CHECK:      %[[WS_UB:.*]] = arith.constant 9 : i32
+  ! CHECK:      %[[WS_STEP:.*]] = arith.constant 1 : i32
+  ! CHECK:      omp.wsloop {
+  ! CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
   !$OMP PARALLEL DO IF(cond) NUM_THREADS(nt) PROC_BIND(close)
   do i=1, 9
-  ! CHECK:    fir.store %[[I]] to %[[IV_ADDR:.*]]#1 : !fir.ref<i32>
-  ! CHECK:    %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]]#0 : !fir.ref<i32>
-  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+  ! CHECK:      fir.store %[[I]] to %[[IV_ADDR:.*]]#1 : !fir.ref<i32>
+  ! CHECK:      %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]]#0 : !fir.ref<i32>
+  ! CHECK:      fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
-  ! CHECK:       omp.yield
-  ! CHECK:       omp.terminator
+  ! CHECK:      omp.yield
+  ! CHECK:      omp.terminator
+  ! CHECK:      omp.terminator
   !$OMP END PARALLEL DO
 end subroutine
 
@@ -58,19 +62,21 @@ subroutine parallel_do_with_clauses(nt)
   integer :: i
   ! CHECK:  %[[NT:.*]] = fir.load %[[NT_DECL]]#0 : !fir.ref<i32>
   ! CHECK:  omp.parallel num_threads(%[[NT]] : i32)
-  ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
-  ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
-  ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:     omp.wsloop schedule(dynamic) for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:      %[[WS_LB:.*]] = arith.constant 1 : i32
+  ! CHECK:      %[[WS_UB:.*]] = arith.constant 9 : i32
+  ! CHECK:      %[[WS_STEP:.*]] = arith.constant 1 : i32
+  ! CHECK:      omp.wsloop schedule(dynamic) {
+  ! CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
   !$OMP PARALLEL DO NUM_THREADS(nt) SCHEDULE(dynamic)
   do i=1, 9
-  ! CHECK:    fir.store %[[I]] to %[[IV_ADDR:.*]]#1 : !fir.ref<i32>
-  ! CHECK:    %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]]#0 : !fir.ref<i32>
-  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+  ! CHECK:      fir.store %[[I]] to %[[IV_ADDR:.*]]#1 : !fir.ref<i32>
+  ! CHECK:      %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]]#0 : !fir.ref<i32>
+  ! CHECK:      fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
-  ! CHECK:       omp.yield
-  ! CHECK:       omp.terminator
+  ! CHECK:      omp.yield
+  ! CHECK:      omp.terminator
+  ! CHECK:      omp.terminator
   !$OMP END PARALLEL DO
 end subroutine
 
@@ -88,20 +94,21 @@ subroutine parallel_do_with_privatisation_clauses(cond,nt)
   integer :: nt
   integer :: i
   ! CHECK:  omp.parallel
-  ! CHECK:    %[[PRIVATE_COND_REF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "cond", pinned, uniq_name = "_QFparallel_do_with_privatisation_clausesEcond"}
-  ! CHECK:    %[[PRIVATE_COND_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_COND_REF]] {uniq_name = "_QFparallel_do_with_privatisation_clausesEcond"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-  ! CHECK:    %[[PRIVATE_NT_REF:.*]] = fir.alloca i32 {bindc_name = "nt", pinned, uniq_name = "_QFparallel_do_with_privatisation_clausesEnt"}
-  ! CHECK:    %[[PRIVATE_NT_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_NT_REF]] {uniq_name = "_QFparallel_do_with_privatisation_clausesEnt"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  ! CHECK:    %[[NT_VAL:.*]] = fir.load %[[NT_DECL]]#0 : !fir.ref<i32>
-  ! CHECK:    hlfir.assign %[[NT_VAL]] to %[[PRIVATE_NT_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
-  ! CHECK:    %[[WS_LB:.*]] = arith.constant 1 : i32
-  ! CHECK:    %[[WS_UB:.*]] = arith.constant 9 : i32
-  ! CHECK:    %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:    omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:      %[[PRIVATE_COND_REF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "cond", pinned, uniq_name = "_QFparallel_do_with_privatisation_clausesEcond"}
+  ! CHECK:      %[[PRIVATE_COND_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_COND_REF]] {uniq_name = "_QFparallel_do_with_privatisation_clausesEcond"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+  ! CHECK:      %[[PRIVATE_NT_REF:.*]] = fir.alloca i32 {bindc_name = "nt", pinned, uniq_name = "_QFparallel_do_with_privatisation_clausesEnt"}
+  ! CHECK:      %[[PRIVATE_NT_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_NT_REF]] {uniq_name = "_QFparallel_do_with_privatisation_clausesEnt"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  ! CHECK:      %[[NT_VAL:.*]] = fir.load %[[NT_DECL]]#0 : !fir.ref<i32>
+  ! CHECK:      hlfir.assign %[[NT_VAL]] to %[[PRIVATE_NT_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+  ! CHECK:      %[[WS_LB:.*]] = arith.constant 1 : i32
+  ! CHECK:      %[[WS_UB:.*]] = arith.constant 9 : i32
+  ! CHECK:      %[[WS_STEP:.*]] = arith.constant 1 : i32
+  ! CHECK:      omp.wsloop {
+  ! CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
   !$OMP PARALLEL DO PRIVATE(cond) FIRSTPRIVATE(nt)
   do i=1, 9
-  ! CHECK:    fir.store %[[I]] to %[[IV_ADDR:.*]]#1 : !fir.ref<i32>
-  ! CHECK:    %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]]#0 : !fir.ref<i32>
+  ! CHECK:      fir.store %[[I]] to %[[IV_ADDR:.*]]#1 : !fir.ref<i32>
+  ! CHECK:      %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]]#0 : !fir.ref<i32>
   ! CHECK:      fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
   ! CHECK:      %[[PRIVATE_COND_VAL:.*]] = fir.load %[[PRIVATE_COND_DECL]]#0 : !fir.ref<!fir.logical<4>>
   ! CHECK:      %[[PRIVATE_COND_VAL_CVT:.*]] = fir.convert %[[PRIVATE_COND_VAL]] : (!fir.logical<4>) -> i1
@@ -112,6 +119,7 @@ subroutine parallel_do_with_privatisation_clauses(cond,nt)
   end do
   ! CHECK:      omp.yield
   ! CHECK:    omp.terminator
+  ! CHECK:    omp.terminator
   !$OMP END PARALLEL DO
 end subroutine
 
@@ -150,10 +158,13 @@ end subroutine parallel_private_do
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 9 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop   for  (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:               fir.store %[[I]] to %[[I_PRIV_DECL]]#1 : !fir.ref<i32>
-! CHECK:               fir.call @_QPfoo(%[[I_PRIV_DECL]]#1, %[[COND_DECL]]#1, %[[NT_PRIV_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<!fir.logical<4>>, !fir.ref<i32>) -> ()
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop {
+! CHECK-NEXT:          omp.loop_nest (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:                 fir.store %[[I]] to %[[I_PRIV_DECL]]#1 : !fir.ref<i32>
+! CHECK:                 fir.call @_QPfoo(%[[I_PRIV_DECL]]#1, %[[COND_DECL]]#1, %[[NT_PRIV_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<!fir.logical<4>>, !fir.ref<i32>) -> ()
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
@@ -196,10 +207,13 @@ end subroutine omp_parallel_multiple_firstprivate_do
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop   for  (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
-! CHECK:               fir.store %[[I]] to %[[I_PRIV_DECL]]#1 : !fir.ref<i32>
-! CHECK:               fir.call @_QPbar(%[[I_PRIV_DECL]]#1, %[[A_PRIV_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop {
+! CHECK-NEXT:          omp.loop_nest (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
+! CHECK:                 fir.store %[[I]] to %[[I_PRIV_DECL]]#1 : !fir.ref<i32>
+! CHECK:                 fir.call @_QPbar(%[[I_PRIV_DECL]]#1, %[[A_PRIV_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
@@ -241,10 +255,13 @@ end subroutine parallel_do_private
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 9 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop   for  (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:               fir.store %[[I]] to %[[I_PRIV_DECL]]#1 : !fir.ref<i32>
-! CHECK:               fir.call @_QPfoo(%[[I_PRIV_DECL]]#1, %[[COND_PRIV_DECL]]#1, %[[NT_PRIV_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<!fir.logical<4>>, !fir.ref<i32>) -> ()
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop {
+! CHECK-NEXT:          omp.loop_nest (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:                 fir.store %[[I]] to %[[I_PRIV_DECL]]#1 : !fir.ref<i32>
+! CHECK:                 fir.call @_QPfoo(%[[I_PRIV_DECL]]#1, %[[COND_PRIV_DECL]]#1, %[[NT_PRIV_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<!fir.logical<4>>, !fir.ref<i32>) -> ()
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
@@ -271,9 +288,9 @@ end subroutine omp_parallel_do_multiple_firstprivate
 ! CHECK-LABEL:   func.func @_QPomp_parallel_do_multiple_firstprivate(
 ! CHECK-SAME:                                                        %[[A_ADDR:.*]]: !fir.ref<i32> {fir.bindc_name = "a"},
 ! CHECK-SAME:                                                        %[[B_ADDR:.*]]: !fir.ref<i32> {fir.bindc_name = "b"}) {
-! CHECK:            %[[A_DECL:.*]]:2 = hlfir.declare %[[A_ADDR]] {uniq_name = "_QFomp_parallel_do_multiple_firstprivateEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:            %[[B_DECL:.*]]:2 = hlfir.declare %[[B_ADDR]] {uniq_name = "_QFomp_parallel_do_multiple_firstprivateEb"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>
-! CHECK:           omp.parallel   {
+! CHECK:           %[[A_DECL:.*]]:2 = hlfir.declare %[[A_ADDR]] {uniq_name = "_QFomp_parallel_do_multiple_firstprivateEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[B_DECL:.*]]:2 = hlfir.declare %[[B_ADDR]] {uniq_name = "_QFomp_parallel_do_multiple_firstprivateEb"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>
+! CHECK:           omp.parallel {
 ! CHECK:             %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 ! CHECK:             %[[I_PRIV_DECL:.*]]:2 = hlfir.declare %[[I_PRIV_ADDR]] {uniq_name = "_QFomp_parallel_do_multiple_firstprivateEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:             %[[A_PRIV_ADDR:.*]] = fir.alloca i32 {bindc_name = "a", pinned, uniq_name = "_QFomp_parallel_do_multiple_firstprivateEa"}
@@ -287,12 +304,15 @@ end subroutine omp_parallel_do_multiple_firstprivate
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop   for  (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
-! CHECK:               fir.store %[[I]] to %[[I_PRIV_DECL]]#1 : !fir.ref<i32>
-! CHECK:               fir.call @_QPbar(%[[I_PRIV_DECL]]#1, %[[A_PRIV_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop {
+! CHECK-NEXT:         omp.loop_nest (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
+! CHECK:                 fir.store %[[I]] to %[[I_PRIV_DECL]]#1 : !fir.ref<i32>
+! CHECK:                 fir.call @_QPbar(%[[I_PRIV_DECL]]#1, %[[A_PRIV_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
-! CHECK:           return
+! CHECK:          return
 ! CHECK:         }
diff --git a/flang/test/Lower/OpenMP/stop-stmt-in-region.f90 b/flang/test/Lower/OpenMP/stop-stmt-in-region.f90
index fdbabc21b2c9e9..4f3819c5e4eb7c 100644
--- a/flang/test/Lower/OpenMP/stop-stmt-in-region.f90
+++ b/flang/test/Lower/OpenMP/stop-stmt-in-region.f90
@@ -82,24 +82,27 @@ subroutine test_stop_in_region3()
 ! CHECK:         %[[VAL_3:.*]] = arith.constant 1 : i32
 ! CHECK:         %[[VAL_4:.*]] = arith.constant 10 : i32
 ! CHECK:         %[[VAL_5:.*]] = arith.constant 1 : i32
-! CHECK:         omp.wsloop   for  (%[[VAL_6:.*]]) : i32 = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_5]]) {
-! CHECK:           fir.store %[[VAL_6]] to %[[VAL_0_DECL]]#1 : !fir.ref<i32>
-! CHECK:           cf.br ^bb1
-! CHECK:         ^bb1:
-! CHECK:           %[[VAL_7:.*]] = arith.constant 3 : i32
-! CHECK:           hlfir.assign %[[VAL_7]] to %[[VAL_2_DECL]]#0 : i32, !fir.ref<i32>
-! CHECK:           %[[VAL_8:.*]] = fir.load %[[VAL_2_DECL]]#0 : !fir.ref<i32>
-! CHECK:           %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:           %[[VAL_10:.*]] = arith.cmpi sgt, %[[VAL_8]], %[[VAL_9]] : i32
-! CHECK:           cf.cond_br %[[VAL_10]], ^bb2, ^bb3
-! CHECK:         ^bb2:
-! CHECK:           %[[VAL_11:.*]] = fir.load %[[VAL_2_DECL]]#0 : !fir.ref<i32>
-! CHECK:           %[[VAL_12:.*]] = arith.constant false
-! CHECK:           %[[VAL_13:.*]] = arith.constant false
-! CHECK:           %[[VAL_14:.*]] = fir.call @_FortranAStopStatement(%[[VAL_11]], %[[VAL_12]], %[[VAL_13]]) {{.*}} : (i32, i1, i1) -> none
-! CHECK:           omp.yield
-! CHECK:         ^bb3:
-! CHECK:           omp.yield
+! CHECK:         omp.wsloop {
+! CHECK-NEXT:      omp.loop_nest (%[[VAL_6:.*]]) : i32 = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_5]]) {
+! CHECK:             fir.store %[[VAL_6]] to %[[VAL_0_DECL]]#1 : !fir.ref<i32>
+! CHECK:             cf.br ^bb1
+! CHECK:           ^bb1:
+! CHECK:             %[[VAL_7:.*]] = arith.constant 3 : i32
+! CHECK:             hlfir.assign %[[VAL_7]] to %[[VAL_2_DECL]]#0 : i32, !fir.ref<i32>
+! CHECK:             %[[VAL_8:.*]] = fir.load %[[VAL_2_DECL]]#0 : !fir.ref<i32>
+! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_10:.*]] = arith.cmpi sgt, %[[VAL_8]], %[[VAL_9]] : i32
+! CHECK:             cf.cond_br %[[VAL_10]], ^bb2, ^bb3
+! CHECK:           ^bb2:
+! CHECK:             %[[VAL_11:.*]] = fir.load %[[VAL_2_DECL]]#0 : !fir.ref<i32>
+! CHECK:             %[[VAL_12:.*]] = arith.constant false
+! CHECK:             %[[VAL_13:.*]] = arith.constant false
+! CHECK:             %[[VAL_14:.*]] = fir.call @_FortranAStopStatement(%[[VAL_11]], %[[VAL_12]], %[[VAL_13]]) {{.*}} : (i32, i1, i1) -> none
+! CHECK:             omp.yield
+! CHECK:           ^bb3:
+! CHECK:             omp.yield
+! CHECK:           }
+! CHECK:           omp.terminator
 ! CHECK:         }
 ! CHECK:         cf.br ^bb1
 ! CHECK:       ^bb1:
diff --git a/flang/test/Lower/OpenMP/target.f90 b/flang/test/Lower/OpenMP/target.f90
index 6f72b5a34d069a..1dcf6cfdc87680 100644
--- a/flang/test/Lower/OpenMP/target.f90
+++ b/flang/test/Lower/OpenMP/target.f90
@@ -594,7 +594,8 @@ subroutine omp_target_parallel_do
       !$omp target parallel do map(tofrom: a)
          !CHECK: %[[I_PVT_ALLOCA:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
          !CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_ALLOCA]] {uniq_name = "_QFomp_target_parallel_doEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-         !CHECK: omp.wsloop for  (%[[I_VAL:.*]]) : i32
+         !CHECK: omp.wsloop {
+         !CHECK-NEXT: omp.loop_nest (%[[I_VAL:.*]]) : i32
          do i = 1, 1024
            !CHECK:   fir.store %[[I_VAL]] to %[[I_PVT_DECL]]#1 : !fir.ref<i32>
            !CHECK:   %[[C10:.*]] = arith.constant 10 : i32
@@ -606,6 +607,8 @@ subroutine omp_target_parallel_do
          end do
          !CHECK: omp.yield
          !CHECK: }
+         !CHECK: omp.terminator
+         !CHECK: }
       !CHECK: omp.terminator
       !CHECK: }
    !CHECK: omp.terminator
diff --git a/flang/test/Lower/OpenMP/unstructured.f90 b/flang/test/Lower/OpenMP/unstructured.f90
index e5bf980ce90fd0..6a1331799d5477 100644
--- a/flang/test/Lower/OpenMP/unstructured.f90
+++ b/flang/test/Lower/OpenMP/unstructured.f90
@@ -70,27 +70,33 @@ subroutine ss2(n) ! unstructured OpenMP construct; loop exit inside construct
 ! CHECK:   ^bb1:  // 2 preds: ^bb0, ^bb3
 ! CHECK:     cond_br %{{[0-9]*}}, ^bb2, ^bb4
 ! CHECK:   ^bb2:  // pred: ^bb1
-! CHECK:     omp.wsloop for (%[[ARG1:.*]]) : {{.*}} {
-! CHECK:       fir.store %[[ARG1]] to %[[OMP_LOOP_K_DECL]]#1 : !fir.ref<i32>
-! CHECK:     @_FortranAioBeginExternalListOutput
-! CHECK:       %[[LOAD_1:.*]] = fir.load %[[OMP_LOOP_K_DECL]]#0 : !fir.ref<i32>
-! CHECK:     @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD_1]])
-! CHECK:       omp.yield
+! CHECK:     omp.wsloop {
+! CHECK:       omp.loop_nest (%[[ARG1:.*]]) : {{.*}} {
+! CHECK:         fir.store %[[ARG1]] to %[[OMP_LOOP_K_DECL]]#1 : !fir.ref<i32>
+! CHECK:         @_FortranAioBeginExternalListOutput
+! CHECK:         %[[LOAD_1:.*]] = fir.load %[[OMP_LOOP_K_DECL]]#0 : !fir.ref<i32>
+! CHECK:         @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD_1]])
+! CHECK:         omp.yield
+! CHECK:       }
+! CHECK:       omp.terminator
 ! CHECK:     }
-! CHECK:     omp.wsloop for (%[[ARG2:.*]]) : {{.*}} {
-! CHECK:       fir.store %[[ARG2]] to %[[OMP_LOOP_J_DECL]]#1 : !fir.ref<i32>
-! CHECK:       br ^bb1
-! CHECK:     ^bb2:  // 2 preds: ^bb1, ^bb5
-! CHECK:       cond_br %{{[0-9]*}}, ^bb3, ^bb6
-! CHECK:     ^bb3:  // pred: ^bb2
-! CHECK:       cond_br %{{[0-9]*}}, ^bb4, ^bb5
-! CHECK:     ^bb4:  // pred: ^bb3
-! CHECK:       @_FortranAioBeginExternalListOutput
-! CHECK:       %[[LOAD_2:.*]] = fir.load %[[K_DECL]]#0 : !fir.ref<i32>
-! CHECK:     @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD_2]])
-! CHECK:       br ^bb2
-! CHECK:     ^bb6:  // 2 preds: ^bb2, ^bb4
-! CHECK:       omp.yield
+! CHECK:     omp.wsloop {
+! CHECK:       omp.loop_nest (%[[ARG2:.*]]) : {{.*}} {
+! CHECK:         fir.store %[[ARG2]] to %[[OMP_LOOP_J_DECL]]#1 : !fir.ref<i32>
+! CHECK:         br ^bb1
+! CHECK:       ^bb2:  // 2 preds: ^bb1, ^bb5
+! CHECK:         cond_br %{{[0-9]*}}, ^bb3, ^bb6
+! CHECK:       ^bb3:  // pred: ^bb2
+! CHECK:         cond_br %{{[0-9]*}}, ^bb4, ^bb5
+! CHECK:       ^bb4:  // pred: ^bb3
+! CHECK:         @_FortranAioBeginExternalListOutput
+! CHECK:         %[[LOAD_2:.*]] = fir.load %[[K_DECL]]#0 : !fir.ref<i32>
+! CHECK:         @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD_2]])
+! CHECK:         br ^bb2
+! CHECK:       ^bb6:  // 2 preds: ^bb2, ^bb4
+! CHECK:         omp.yield
+! CHECK:       }
+! CHECK:       omp.terminator
 ! CHECK:     }
 ! CHECK:     br ^bb1
 ! CHECK:   ^bb4:  // pred: ^bb1
@@ -121,20 +127,23 @@ subroutine ss3(n) ! nested unstructured OpenMP constructs
 ! CHECK:       omp.parallel {
 ! CHECK:         %[[ALLOCA:.*]] = fir.alloca i32 {{{.*}}, pinned}
 ! CHECK:         %[[OMP_LOOP_J_DECL:.*]]:2 = hlfir.declare %[[ALLOCA]] {uniq_name = "_QFss4Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:         omp.wsloop for (%[[ARG:.*]]) : {{.*}} {
-! CHECK:           fir.store %[[ARG]] to %[[OMP_LOOP_J_DECL]]#1 : !fir.ref<i32>
-! CHECK:           %[[COND:.*]] = arith.cmpi eq, %{{.*}}, %{{.*}}
-! CHECK:           %[[COND_XOR:.*]] = arith.xori %[[COND]], %{{.*}}
-! CHECK:          fir.if %[[COND_XOR]] {
-! CHECK:           @_FortranAioBeginExternalListOutput
-! CHECK:           %[[LOAD:.*]] = fir.load %[[OMP_LOOP_J_DECL]]#0 : !fir.ref<i32>
-! CHECK:           @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD]])
-! CHECK:          } else {
-! CHECK:          }
-! CHECK-NEXT:      omp.yield
+! CHECK:         omp.wsloop {
+! CHECK-NEXT:      omp.loop_nest (%[[ARG:.*]]) : {{.*}} {
+! CHECK:             fir.store %[[ARG]] to %[[OMP_LOOP_J_DECL]]#1 : !fir.ref<i32>
+! CHECK:             %[[COND:.*]] = arith.cmpi eq, %{{.*}}, %{{.*}}
+! CHECK:             %[[COND_XOR:.*]] = arith.xori %[[COND]], %{{.*}}
+! CHECK:             fir.if %[[COND_XOR]] {
+! CHECK:              @_FortranAioBeginExternalListOutput
+! CHECK:              %[[LOAD:.*]] = fir.load %[[OMP_LOOP_J_DECL]]#0 : !fir.ref<i32>
+! CHECK:              @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD]])
+! CHECK:             } else {
+! CHECK:             }
+! CHECK-NEXT:        omp.yield
+! CHECK-NEXT:      }
+! CHECK-NEXT:      omp.terminator
+! CHECK-NEXT:    }
+! CHECK:         omp.terminator
 ! CHECK-NEXT:  }
-! CHECK:       omp.terminator
-! CHECK-NEXT:}
 subroutine ss4(n) ! CYCLE in OpenMP wsloop constructs
   !$omp parallel
     do i = 1, 3
@@ -150,20 +159,23 @@ subroutine ss4(n) ! CYCLE in OpenMP wsloop constructs
 
 ! CHECK-LABEL: func @_QPss5() {
 ! CHECK:  omp.parallel  {
-! CHECK:    omp.wsloop {{.*}} {
-! CHECK:      br ^[[BB1:.*]]
-! CHECK:    ^[[BB1]]:
-! CHECK:      br ^[[BB2:.*]]
-! CHECK:    ^[[BB2]]:
-! CHECK:      cond_br %{{.*}}, ^[[BB3:.*]], ^[[BB6:.*]]
-! CHECK:    ^[[BB3]]:
-! CHECK:      cond_br %{{.*}}, ^[[BB4:.*]], ^[[BB3:.*]]
-! CHECK:    ^[[BB4]]:
-! CHECK:      br ^[[BB6]]
-! CHECK:    ^[[BB3]]:
-! CHECK:      br ^[[BB2]]
-! CHECK:    ^[[BB6]]:
-! CHECK:      omp.yield
+! CHECK:    omp.wsloop {
+! CHECK:      omp.loop_nest {{.*}} {
+! CHECK:        br ^[[BB1:.*]]
+! CHECK:      ^[[BB1]]:
+! CHECK:        br ^[[BB2:.*]]
+! CHECK:      ^[[BB2]]:
+! CHECK:        cond_br %{{.*}}, ^[[BB3:.*]], ^[[BB6:.*]]
+! CHECK:      ^[[BB3]]:
+! CHECK:        cond_br %{{.*}}, ^[[BB4:.*]], ^[[BB3:.*]]
+! CHECK:      ^[[BB4]]:
+! CHECK:        br ^[[BB6]]
+! CHECK:      ^[[BB3]]:
+! CHECK:        br ^[[BB2]]
+! CHECK:      ^[[BB6]]:
+! CHECK:        omp.yield
+! CHECK:      }
+! CHECK:      omp.terminator
 ! CHECK:    }
 ! CHECK:    omp.terminator
 ! CHECK:  }
@@ -190,20 +202,23 @@ subroutine ss5() ! EXIT inside OpenMP wsloop (inside parallel)
 ! CHECK:  ^[[BB1_OUTER]]:
 ! CHECK:    cond_br %{{.*}}, ^[[BB2_OUTER:.*]], ^[[BB3_OUTER:.*]]
 ! CHECK:  ^[[BB2_OUTER]]:
-! CHECK:    omp.wsloop {{.*}} {
-! CHECK:      br ^[[BB1:.*]]
-! CHECK:    ^[[BB1]]:
-! CHECK:      br ^[[BB2:.*]]
-! CHECK:    ^[[BB2]]:
-! CHECK:      cond_br %{{.*}}, ^[[BB3:.*]], ^[[BB6:.*]]
-! CHECK:    ^[[BB3]]:
-! CHECK:      cond_br %{{.*}}, ^[[BB4:.*]], ^[[BB5:.*]]
-! CHECK:    ^[[BB4]]:
-! CHECK:      br ^[[BB6]]
-! CHECK:    ^[[BB5]]
-! CHECK:      br ^[[BB2]]
-! CHECK:    ^[[BB6]]:
-! CHECK:      omp.yield
+! CHECK:    omp.wsloop {
+! CHECK:      omp.loop_nest {{.*}} {
+! CHECK:        br ^[[BB1:.*]]
+! CHECK:      ^[[BB1]]:
+! CHECK:        br ^[[BB2:.*]]
+! CHECK:      ^[[BB2]]:
+! CHECK:        cond_br %{{.*}}, ^[[BB3:.*]], ^[[BB6:.*]]
+! CHECK:      ^[[BB3]]:
+! CHECK:        cond_br %{{.*}}, ^[[BB4:.*]], ^[[BB5:.*]]
+! CHECK:      ^[[BB4]]:
+! CHECK:        br ^[[BB6]]
+! CHECK:      ^[[BB5]]
+! CHECK:        br ^[[BB2]]
+! CHECK:      ^[[BB6]]:
+! CHECK:        omp.yield
+! CHECK:      }
+! CHECK:      omp.terminator
 ! CHECK:    }
 ! CHECK:    br ^[[BB1_OUTER]]
 ! CHECK:  ^[[BB3_OUTER]]:
@@ -234,20 +249,23 @@ subroutine ss6() ! EXIT inside OpenMP wsloop in a do loop (inside parallel)
 ! CHECK:   cond_br %{{.*}}, ^[[BB2_OUTER:.*]], ^[[BB3_OUTER:.*]]
 ! CHECK-NEXT: ^[[BB2_OUTER:.*]]:
 ! CHECK:   omp.parallel  {
-! CHECK:     omp.wsloop {{.*}} {
-! CHECK:       br ^[[BB1:.*]]
-! CHECK-NEXT:     ^[[BB1]]:
-! CHECK:       br ^[[BB2:.*]]
-! CHECK-NEXT:     ^[[BB2]]:
-! CHECK:       cond_br %{{.*}}, ^[[BB3:.*]], ^[[BB6:.*]]
-! CHECK-NEXT:     ^[[BB3]]:
-! CHECK:       cond_br %{{.*}}, ^[[BB4:.*]], ^[[BB5:.*]]
-! CHECK-NEXT:     ^[[BB4]]:
-! CHECK:       br ^[[BB6]]
-! CHECK-NEXT:     ^[[BB5]]:
-! CHECK:       br ^[[BB2]]
-! CHECK-NEXT:     ^[[BB6]]:
-! CHECK:       omp.yield
+! CHECK:     omp.wsloop {
+! CHECK:       omp.loop_nest {{.*}} {
+! CHECK:         br ^[[BB1:.*]]
+! CHECK-NEXT:       ^[[BB1]]:
+! CHECK:         br ^[[BB2:.*]]
+! CHECK-NEXT:       ^[[BB2]]:
+! CHECK:         cond_br %{{.*}}, ^[[BB3:.*]], ^[[BB6:.*]]
+! CHECK-NEXT:       ^[[BB3]]:
+! CHECK:         cond_br %{{.*}}, ^[[BB4:.*]], ^[[BB5:.*]]
+! CHECK-NEXT:       ^[[BB4]]:
+! CHECK:         br ^[[BB6]]
+! CHECK-NEXT:       ^[[BB5]]:
+! CHECK:         br ^[[BB2]]
+! CHECK-NEXT:       ^[[BB6]]:
+! CHECK:         omp.yield
+! CHECK:       }
+! CHECK:       omp.terminator
 ! CHECK:     }
 ! CHECK:     omp.terminator
 ! CHECK:   }
@@ -272,20 +290,23 @@ subroutine ss7() ! EXIT inside OpenMP parallel do (inside do loop)
 
 ! CHECK-LABEL: func @_QPss8() {
 ! CHECK:  omp.parallel  {
-! CHECK:    omp.wsloop {{.*}} {
-! CHECK:      br ^[[BB1:.*]]
-! CHECK-NEXT:    ^[[BB1]]:
-! CHECK:      br ^[[BB2:.*]]
-! CHECK:    ^[[BB2]]:
-! CHECK:      cond_br %{{.*}}, ^[[BB3:.*]], ^[[BB6:.*]]
-! CHECK:    ^[[BB3]]:
-! CHECK:      cond_br %{{.*}}, ^[[BB4:.*]], ^[[BB5:.*]]
-! CHECK:    ^[[BB4]]:
-! CHECK-NEXT:    br ^[[BB6]]
-! CHECK:    ^[[BB5]]:
-! CHECK:      br ^[[BB2]]
-! CHECK-NEXT:    ^[[BB6]]:
-! CHECK:      omp.yield
+! CHECK:    omp.wsloop {
+! CHECK:      omp.loop_nest {{.*}} {
+! CHECK:        br ^[[BB1:.*]]
+! CHECK-NEXT:      ^[[BB1]]:
+! CHECK:        br ^[[BB2:.*]]
+! CHECK:      ^[[BB2]]:
+! CHECK:        cond_br %{{.*}}, ^[[BB3:.*]], ^[[BB6:.*]]
+! CHECK:      ^[[BB3]]:
+! CHECK:        cond_br %{{.*}}, ^[[BB4:.*]], ^[[BB5:.*]]
+! CHECK:      ^[[BB4]]:
+! CHECK-NEXT:      br ^[[BB6]]
+! CHECK:      ^[[BB5]]:
+! CHECK:        br ^[[BB2]]
+! CHECK-NEXT:      ^[[BB6]]:
+! CHECK:        omp.yield
+! CHECK:      }
+! CHECK:      omp.terminator
 ! CHECK:    }
 ! CHECK:    omp.terminator
 ! CHECK:  }
diff --git a/flang/test/Lower/OpenMP/wsloop-chunks.f90 b/flang/test/Lower/OpenMP/wsloop-chunks.f90
index 5016c8985bda04..fa6ec219a490eb 100644
--- a/flang/test/Lower/OpenMP/wsloop-chunks.f90
+++ b/flang/test/Lower/OpenMP/wsloop-chunks.f90
@@ -20,11 +20,14 @@ program wsloop
 ! CHECK:         %[[VAL_3:.*]] = arith.constant 9 : i32
 ! CHECK:         %[[VAL_4:.*]] = arith.constant 1 : i32
 ! CHECK:         %[[VAL_5:.*]] = arith.constant 4 : i32
-! CHECK:         omp.wsloop   schedule(static = %[[VAL_5]] : i32) nowait for  (%[[ARG0:.*]]) : i32 = (%[[VAL_2]]) to (%[[VAL_3]]) inclusive step (%[[VAL_4]]) {
-! CHECK:           fir.store %[[ARG0]] to %[[STORE_IV:.*]]#1 : !fir.ref<i32>
-! CHECK:           %[[LOAD_IV:.*]] = fir.load %[[STORE_IV]]#0 : !fir.ref<i32>
-! CHECK:           {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
-! CHECK:           omp.yield
+! CHECK:         omp.wsloop schedule(static = %[[VAL_5]] : i32) nowait {
+! CHECK-NEXT:      omp.loop_nest (%[[ARG0:.*]]) : i32 = (%[[VAL_2]]) to (%[[VAL_3]]) inclusive step (%[[VAL_4]]) {
+! CHECK:             fir.store %[[ARG0]] to %[[STORE_IV:.*]]#1 : !fir.ref<i32>
+! CHECK:             %[[LOAD_IV:.*]] = fir.load %[[STORE_IV]]#0 : !fir.ref<i32>
+! CHECK:             {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+! CHECK:             omp.yield
+! CHECK:           }
+! CHECK:           omp.terminator
 ! CHECK:         }
 
 end do
@@ -38,13 +41,16 @@ program wsloop
 ! CHECK:         %[[VAL_15:.*]] = arith.constant 9 : i32
 ! CHECK:         %[[VAL_16:.*]] = arith.constant 1 : i32
 ! CHECK:         %[[VAL_17:.*]] = arith.constant 4 : i32
-! CHECK:         omp.wsloop   schedule(static = %[[VAL_17]] : i32) nowait for  (%[[ARG1:.*]]) : i32 = (%[[VAL_14]]) to (%[[VAL_15]]) inclusive step (%[[VAL_16]]) {
-! CHECK:           fir.store %[[ARG1]] to %[[STORE_IV1:.*]]#1 : !fir.ref<i32>
-! CHECK:           %[[VAL_24:.*]] = arith.constant 2 : i32
-! CHECK:           %[[LOAD_IV1:.*]] = fir.load %[[STORE_IV1]]#0 : !fir.ref<i32>
-! CHECK:           %[[VAL_25:.*]] = arith.muli %[[VAL_24]], %[[LOAD_IV1]] : i32
-! CHECK:           {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[VAL_25]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
-! CHECK:           omp.yield
+! CHECK:         omp.wsloop schedule(static = %[[VAL_17]] : i32) nowait {
+! CHECK-NEXT:      omp.loop_nest (%[[ARG1:.*]]) : i32 = (%[[VAL_14]]) to (%[[VAL_15]]) inclusive step (%[[VAL_16]]) {
+! CHECK:             fir.store %[[ARG1]] to %[[STORE_IV1:.*]]#1 : !fir.ref<i32>
+! CHECK:             %[[VAL_24:.*]] = arith.constant 2 : i32
+! CHECK:             %[[LOAD_IV1:.*]] = fir.load %[[STORE_IV1]]#0 : !fir.ref<i32>
+! CHECK:             %[[VAL_25:.*]] = arith.muli %[[VAL_24]], %[[LOAD_IV1]] : i32
+! CHECK:             {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[VAL_25]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+! CHECK:             omp.yield
+! CHECK:           }
+! CHECK:           omp.terminator
 ! CHECK:         }
   
 end do
@@ -62,13 +68,16 @@ program wsloop
 ! CHECK:         %[[VAL_30:.*]] = arith.constant 9 : i32
 ! CHECK:         %[[VAL_31:.*]] = arith.constant 1 : i32
 ! CHECK:         %[[VAL_32:.*]] = fir.load %[[VAL_0]]#0 : !fir.ref<i32>
-! CHECK:         omp.wsloop   schedule(static = %[[VAL_32]] : i32) nowait for  (%[[ARG2:.*]]) : i32 = (%[[VAL_29]]) to (%[[VAL_30]]) inclusive step (%[[VAL_31]]) {
-! CHECK:           fir.store %[[ARG2]] to %[[STORE_IV2:.*]]#1 : !fir.ref<i32>
-! CHECK:           %[[VAL_39:.*]] = arith.constant 3 : i32
-! CHECK:           %[[LOAD_IV2:.*]] = fir.load %[[STORE_IV2]]#0 : !fir.ref<i32>
-! CHECK:           %[[VAL_40:.*]] = arith.muli %[[VAL_39]], %[[LOAD_IV2]] : i32
-! CHECK:           {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[VAL_40]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
-! CHECK:           omp.yield
+! CHECK:         omp.wsloop schedule(static = %[[VAL_32]] : i32) nowait {
+! CHECK-NEXT:      omp.loop_nest (%[[ARG2:.*]]) : i32 = (%[[VAL_29]]) to (%[[VAL_30]]) inclusive step (%[[VAL_31]]) {
+! CHECK:             fir.store %[[ARG2]] to %[[STORE_IV2:.*]]#1 : !fir.ref<i32>
+! CHECK:             %[[VAL_39:.*]] = arith.constant 3 : i32
+! CHECK:             %[[LOAD_IV2:.*]] = fir.load %[[STORE_IV2]]#0 : !fir.ref<i32>
+! CHECK:             %[[VAL_40:.*]] = arith.muli %[[VAL_39]], %[[LOAD_IV2]] : i32
+! CHECK:             {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[VAL_40]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+! CHECK:             omp.yield
+! CHECK:           }
+! CHECK:           omp.terminator
 ! CHECK:         }
 ! CHECK:         return
 ! CHECK:       }
diff --git a/flang/test/Lower/OpenMP/wsloop-collapse.f90 b/flang/test/Lower/OpenMP/wsloop-collapse.f90
index c93fcf4ef968da..d9541e176f6a81 100644
--- a/flang/test/Lower/OpenMP/wsloop-collapse.f90
+++ b/flang/test/Lower/OpenMP/wsloop-collapse.f90
@@ -49,23 +49,26 @@ program wsloop_collapse
 !CHECK:           %[[VAL_30:.*]] = arith.constant 1 : i32
 !CHECK:           %[[VAL_31:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
 !CHECK:           %[[VAL_32:.*]] = arith.constant 1 : i32
-!CHECK:           omp.wsloop   for  (%[[VAL_33:.*]], %[[VAL_34:.*]], %[[VAL_35:.*]]) : i32 = (%[[VAL_24]], %[[VAL_27]], %[[VAL_30]]) to (%[[VAL_25]], %[[VAL_28]], %[[VAL_31]]) inclusive step (%[[VAL_26]], %[[VAL_29]], %[[VAL_32]]) {
+!CHECK:           omp.wsloop {
+!CHECK-NEXT:        omp.loop_nest (%[[VAL_33:.*]], %[[VAL_34:.*]], %[[VAL_35:.*]]) : i32 = (%[[VAL_24]], %[[VAL_27]], %[[VAL_30]]) to (%[[VAL_25]], %[[VAL_28]], %[[VAL_31]]) inclusive step (%[[VAL_26]], %[[VAL_29]], %[[VAL_32]]) {
   !$omp do collapse(3)
   do i = 1, a
      do j= 1, b
         do k = 1, c
-!CHECK:             fir.store %[[VAL_33]] to %[[VAL_5]]#1 : !fir.ref<i32>
-!CHECK:             fir.store %[[VAL_34]] to %[[VAL_3]]#1 : !fir.ref<i32>
-!CHECK:             fir.store %[[VAL_35]] to %[[VAL_1]]#1 : !fir.ref<i32>
-!CHECK:             %[[VAL_36:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
-!CHECK:             %[[VAL_37:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
-!CHECK:             %[[VAL_38:.*]] = arith.addi %[[VAL_36]], %[[VAL_37]] : i32
-!CHECK:             %[[VAL_39:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<i32>
-!CHECK:             %[[VAL_40:.*]] = arith.addi %[[VAL_38]], %[[VAL_39]] : i32
-!CHECK:             %[[VAL_41:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref<i32>
-!CHECK:             %[[VAL_42:.*]] = arith.addi %[[VAL_40]], %[[VAL_41]] : i32
-!CHECK:             hlfir.assign %[[VAL_42]] to %[[VAL_19]]#0 : i32, !fir.ref<i32>
-!CHECK:             omp.yield
+!CHECK:               fir.store %[[VAL_33]] to %[[VAL_5]]#1 : !fir.ref<i32>
+!CHECK:               fir.store %[[VAL_34]] to %[[VAL_3]]#1 : !fir.ref<i32>
+!CHECK:               fir.store %[[VAL_35]] to %[[VAL_1]]#1 : !fir.ref<i32>
+!CHECK:               %[[VAL_36:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+!CHECK:               %[[VAL_37:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+!CHECK:               %[[VAL_38:.*]] = arith.addi %[[VAL_36]], %[[VAL_37]] : i32
+!CHECK:               %[[VAL_39:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<i32>
+!CHECK:               %[[VAL_40:.*]] = arith.addi %[[VAL_38]], %[[VAL_39]] : i32
+!CHECK:               %[[VAL_41:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref<i32>
+!CHECK:               %[[VAL_42:.*]] = arith.addi %[[VAL_40]], %[[VAL_41]] : i32
+!CHECK:               hlfir.assign %[[VAL_42]] to %[[VAL_19]]#0 : i32, !fir.ref<i32>
+!CHECK:               omp.yield
+!CHECK-NEXT:        }
+!CHECK-NEXT:        omp.terminator
            x = x + i + j + k
         end do
      end do
diff --git a/flang/test/Lower/OpenMP/wsloop-monotonic.f90 b/flang/test/Lower/OpenMP/wsloop-monotonic.f90
index fba9105b981813..531d995052f6c4 100644
--- a/flang/test/Lower/OpenMP/wsloop-monotonic.f90
+++ b/flang/test/Lower/OpenMP/wsloop-monotonic.f90
@@ -15,19 +15,21 @@ program wsloop_dynamic
 !CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
 !CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
 !CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-!CHECK:     omp.wsloop schedule(dynamic, monotonic) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
-!CHECK:       fir.store %[[I]] to %[[ALLOCA_IV:.*]]#1 : !fir.ref<i32>
+!CHECK:     omp.wsloop schedule(dynamic, monotonic) nowait {
+!CHECK-NEXT:  omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
+!CHECK:         fir.store %[[I]] to %[[ALLOCA_IV:.*]]#1 : !fir.ref<i32>
 
   do i=1, 9
     print*, i
-!CHECK:    %[[RTBEGIN:.*]] = fir.call @_FortranAioBeginExternalListOutput
-!CHECK:    %[[LOAD:.*]] = fir.load %[[ALLOCA_IV]]#0 : !fir.ref<i32>
-!CHECK:    fir.call @_FortranAioOutputInteger32(%[[RTBEGIN]], %[[LOAD]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
-!CHECK:    fir.call @_FortranAioEndIoStatement(%[[RTBEGIN]]) {{.*}}: (!fir.ref<i8>) -> i32
+!CHECK:         %[[RTBEGIN:.*]] = fir.call @_FortranAioBeginExternalListOutput
+!CHECK:         %[[LOAD:.*]] = fir.load %[[ALLOCA_IV]]#0 : !fir.ref<i32>
+!CHECK:         fir.call @_FortranAioOutputInteger32(%[[RTBEGIN]], %[[LOAD]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+!CHECK:         fir.call @_FortranAioEndIoStatement(%[[RTBEGIN]]) {{.*}}: (!fir.ref<i8>) -> i32
   end do
-!CHECK:       omp.yield
+!CHECK:         omp.yield
 !CHECK:       omp.terminator
-!CHECK:     }
+!CHECK:     omp.terminator
+!CHECK:   }
 
 !$OMP END DO NOWAIT
 !$OMP END PARALLEL
diff --git a/flang/test/Lower/OpenMP/wsloop-nonmonotonic.f90 b/flang/test/Lower/OpenMP/wsloop-nonmonotonic.f90
index 1bd7a2edc0f523..420bc0bffaece3 100644
--- a/flang/test/Lower/OpenMP/wsloop-nonmonotonic.f90
+++ b/flang/test/Lower/OpenMP/wsloop-nonmonotonic.f90
@@ -17,20 +17,23 @@ program wsloop_dynamic
 !CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
 !CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
 !CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-!CHECK:     omp.wsloop schedule(dynamic, nonmonotonic) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
-!CHECK:       fir.store %[[I]] to %[[ALLOCA_IV]]#1 : !fir.ref<i32>
+!CHECK:     omp.wsloop schedule(dynamic, nonmonotonic) nowait {
+!CHECK-NEXT:  omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
+!CHECK:         fir.store %[[I]] to %[[ALLOCA_IV]]#1 : !fir.ref<i32>
 
   do i=1, 9
     print*, i
-!CHECK:    %[[RTBEGIN:.*]] = fir.call @_FortranAioBeginExternalListOutput
-!CHECK:    %[[LOAD:.*]] = fir.load %[[ALLOCA_IV]]#0 : !fir.ref<i32>
-!CHECK:    fir.call @_FortranAioOutputInteger32(%[[RTBEGIN]], %[[LOAD]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
-!CHECK:    fir.call @_FortranAioEndIoStatement(%[[RTBEGIN]]) {{.*}}: (!fir.ref<i8>) -> i32
+!CHECK:         %[[RTBEGIN:.*]] = fir.call @_FortranAioBeginExternalListOutput
+!CHECK:         %[[LOAD:.*]] = fir.load %[[ALLOCA_IV]]#0 : !fir.ref<i32>
+!CHECK:         fir.call @_FortranAioOutputInteger32(%[[RTBEGIN]], %[[LOAD]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+!CHECK:         fir.call @_FortranAioEndIoStatement(%[[RTBEGIN]]) {{.*}}: (!fir.ref<i8>) -> i32
   end do
-!CHECK:       omp.yield
-!CHECK:         }
+!CHECK:         omp.yield
+!CHECK:       }
 !CHECK:       omp.terminator
 !CHECK:     }
+!CHECK:     omp.terminator
+!CHECK:   }
 
 !$OMP END DO NOWAIT
 !$OMP END PARALLEL
diff --git a/flang/test/Lower/OpenMP/wsloop-ordered.f90 b/flang/test/Lower/OpenMP/wsloop-ordered.f90
index 5185d2d085bac7..f4fa81c52315c8 100644
--- a/flang/test/Lower/OpenMP/wsloop-ordered.f90
+++ b/flang/test/Lower/OpenMP/wsloop-ordered.f90
@@ -6,9 +6,12 @@
 subroutine wsloop_ordered_no_para()
   integer :: a(10), i
 
-! CHECK:  omp.wsloop ordered(0) for (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
-! CHECK:    omp.yield
-! CHECK:  }
+! CHECK:  omp.wsloop ordered(0) {
+! CHECK-NEXT: omp.loop_nest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
+! CHECK:        omp.yield
+! CHECK:      }
+! CHECK:      omp.terminator
+! CHECK:    }
 
   !$omp do ordered
   do i = 2, 10
@@ -25,9 +28,12 @@ subroutine wsloop_ordered_with_para()
   integer :: a(10), i
 
 ! CHECK: func @_QPwsloop_ordered_with_para() {
-! CHECK:  omp.wsloop ordered(1) for (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
-! CHECK:    omp.yield
-! CHECK:  }
+! CHECK:  omp.wsloop ordered(1) {
+! CHECK-NEXT: omp.loop_nest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
+! CHECK:        omp.yield
+! CHECK:      }
+! CHECK:      omp.terminator
+! CHECK:    }
 
   !$omp do ordered(1)
   do i = 2, 10
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-add-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-add-byref.f90
index e63db33bbe2505..c9d03435d9e18c 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-add-byref.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-add-byref.f90
@@ -82,14 +82,17 @@
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@add_reduction_byref_i32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_14:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_15:.*]] = arith.addi %[[VAL_13]], %[[VAL_14]] : i32
-! CHECK:               hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : i32, !fir.ref<i32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@add_reduction_byref_i32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_14:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_15:.*]] = arith.addi %[[VAL_13]], %[[VAL_14]] : i32
+! CHECK:                 hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : i32, !fir.ref<i32>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
@@ -122,15 +125,18 @@ subroutine simple_int_reduction
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@add_reduction_byref_f32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reductionEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<f32>
-! CHECK:               %[[VAL_14:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i32) -> f32
-! CHECK:               %[[VAL_16:.*]] = arith.addf %[[VAL_13]], %[[VAL_15]] fastmath<contract> : f32
-! CHECK:               hlfir.assign %[[VAL_16]] to %[[VAL_12]]#0 : f32, !fir.ref<f32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@add_reduction_byref_f32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reductionEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<f32>
+! CHECK:                 %[[VAL_14:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i32) -> f32
+! CHECK:                 %[[VAL_16:.*]] = arith.addf %[[VAL_13]], %[[VAL_15]] fastmath<contract> : f32
+! CHECK:                 hlfir.assign %[[VAL_16]] to %[[VAL_12]]#0 : f32, !fir.ref<f32>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
@@ -163,14 +169,17 @@ subroutine simple_real_reduction
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@add_reduction_byref_i32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reduction_switch_orderEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_14:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_15:.*]] = arith.addi %[[VAL_13]], %[[VAL_14]] : i32
-! CHECK:               hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : i32, !fir.ref<i32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@add_reduction_byref_i32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reduction_switch_orderEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_14:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_15:.*]] = arith.addi %[[VAL_13]], %[[VAL_14]] : i32
+! CHECK:                 hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : i32, !fir.ref<i32>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
@@ -202,15 +211,18 @@ subroutine simple_int_reduction_switch_order
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@add_reduction_byref_f32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reduction_switch_orderEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i32) -> f32
-! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<f32>
-! CHECK:               %[[VAL_16:.*]] = arith.addf %[[VAL_14]], %[[VAL_15]] fastmath<contract> : f32
-! CHECK:               hlfir.assign %[[VAL_16]] to %[[VAL_12]]#0 : f32, !fir.ref<f32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@add_reduction_byref_f32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reduction_switch_orderEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i32) -> f32
+! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<f32>
+! CHECK:                 %[[VAL_16:.*]] = arith.addf %[[VAL_14]], %[[VAL_15]] fastmath<contract> : f32
+! CHECK:                 hlfir.assign %[[VAL_16]] to %[[VAL_12]]#0 : f32, !fir.ref<f32>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
@@ -250,24 +262,27 @@ subroutine simple_real_reduction_switch_order
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_15:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@add_reduction_byref_i32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<i32>, @add_reduction_byref_i32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<i32>, @add_reduction_byref_i32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<i32>)  for  (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
-! CHECK:               fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_int_reductions_same_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_int_reductions_same_typeEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_int_reductions_same_typeEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_23:.*]] = fir.load %[[VAL_20]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_24:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_25:.*]] = arith.addi %[[VAL_23]], %[[VAL_24]] : i32
-! CHECK:               hlfir.assign %[[VAL_25]] to %[[VAL_20]]#0 : i32, !fir.ref<i32>
-! CHECK:               %[[VAL_26:.*]] = fir.load %[[VAL_21]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_27:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_28:.*]] = arith.addi %[[VAL_26]], %[[VAL_27]] : i32
-! CHECK:               hlfir.assign %[[VAL_28]] to %[[VAL_21]]#0 : i32, !fir.ref<i32>
-! CHECK:               %[[VAL_29:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_30:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_31:.*]] = arith.addi %[[VAL_29]], %[[VAL_30]] : i32
-! CHECK:               hlfir.assign %[[VAL_31]] to %[[VAL_22]]#0 : i32, !fir.ref<i32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@add_reduction_byref_i32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<i32>, @add_reduction_byref_i32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<i32>, @add_reduction_byref_i32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<i32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
+! CHECK:                 %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_int_reductions_same_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_int_reductions_same_typeEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_int_reductions_same_typeEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_23:.*]] = fir.load %[[VAL_20]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_24:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_25:.*]] = arith.addi %[[VAL_23]], %[[VAL_24]] : i32
+! CHECK:                 hlfir.assign %[[VAL_25]] to %[[VAL_20]]#0 : i32, !fir.ref<i32>
+! CHECK:                 %[[VAL_26:.*]] = fir.load %[[VAL_21]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_27:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_28:.*]] = arith.addi %[[VAL_26]], %[[VAL_27]] : i32
+! CHECK:                 hlfir.assign %[[VAL_28]] to %[[VAL_21]]#0 : i32, !fir.ref<i32>
+! CHECK:                 %[[VAL_29:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_30:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_31:.*]] = arith.addi %[[VAL_29]], %[[VAL_30]] : i32
+! CHECK:                 hlfir.assign %[[VAL_31]] to %[[VAL_22]]#0 : i32, !fir.ref<i32>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
@@ -311,27 +326,30 @@ subroutine multiple_int_reductions_same_type
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_15:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@add_reduction_byref_f32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<f32>, @add_reduction_byref_f32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<f32>, @add_reduction_byref_f32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<f32>)  for  (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
-! CHECK:               fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_real_reductions_same_typeEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-! CHECK:               %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_real_reductions_same_typeEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-! CHECK:               %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_real_reductions_same_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-! CHECK:               %[[VAL_23:.*]] = fir.load %[[VAL_20]]#0 : !fir.ref<f32>
-! CHECK:               %[[VAL_24:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (i32) -> f32
-! CHECK:               %[[VAL_26:.*]] = arith.addf %[[VAL_23]], %[[VAL_25]] fastmath<contract> : f32
-! CHECK:               hlfir.assign %[[VAL_26]] to %[[VAL_20]]#0 : f32, !fir.ref<f32>
-! CHECK:               %[[VAL_27:.*]] = fir.load %[[VAL_21]]#0 : !fir.ref<f32>
-! CHECK:               %[[VAL_28:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (i32) -> f32
-! CHECK:               %[[VAL_30:.*]] = arith.addf %[[VAL_27]], %[[VAL_29]] fastmath<contract> : f32
-! CHECK:               hlfir.assign %[[VAL_30]] to %[[VAL_21]]#0 : f32, !fir.ref<f32>
-! CHECK:               %[[VAL_31:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref<f32>
-! CHECK:               %[[VAL_32:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (i32) -> f32
-! CHECK:               %[[VAL_34:.*]] = arith.addf %[[VAL_31]], %[[VAL_33]] fastmath<contract> : f32
-! CHECK:               hlfir.assign %[[VAL_34]] to %[[VAL_22]]#0 : f32, !fir.ref<f32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@add_reduction_byref_f32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<f32>, @add_reduction_byref_f32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<f32>, @add_reduction_byref_f32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<f32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
+! CHECK:                 %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_real_reductions_same_typeEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:                 %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_real_reductions_same_typeEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:                 %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_real_reductions_same_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:                 fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_23:.*]] = fir.load %[[VAL_20]]#0 : !fir.ref<f32>
+! CHECK:                 %[[VAL_24:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (i32) -> f32
+! CHECK:                 %[[VAL_26:.*]] = arith.addf %[[VAL_23]], %[[VAL_25]] fastmath<contract> : f32
+! CHECK:                 hlfir.assign %[[VAL_26]] to %[[VAL_20]]#0 : f32, !fir.ref<f32>
+! CHECK:                 %[[VAL_27:.*]] = fir.load %[[VAL_21]]#0 : !fir.ref<f32>
+! CHECK:                 %[[VAL_28:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (i32) -> f32
+! CHECK:                 %[[VAL_30:.*]] = arith.addf %[[VAL_27]], %[[VAL_29]] fastmath<contract> : f32
+! CHECK:                 hlfir.assign %[[VAL_30]] to %[[VAL_21]]#0 : f32, !fir.ref<f32>
+! CHECK:                 %[[VAL_31:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref<f32>
+! CHECK:                 %[[VAL_32:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (i32) -> f32
+! CHECK:                 %[[VAL_34:.*]] = arith.addf %[[VAL_31]], %[[VAL_33]] fastmath<contract> : f32
+! CHECK:                 hlfir.assign %[[VAL_34]] to %[[VAL_22]]#0 : f32, !fir.ref<f32>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
@@ -379,32 +397,35 @@ subroutine multiple_real_reductions_same_type
 ! CHECK:             %[[VAL_16:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_17:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_18:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@add_reduction_byref_i32 %[[VAL_5]]#0 -> %[[VAL_19:.*]] : !fir.ref<i32>, @add_reduction_byref_i64 %[[VAL_7]]#0 -> %[[VAL_20:.*]] : !fir.ref<i64>, @add_reduction_byref_f32 %[[VAL_9]]#0 -> %[[VAL_21:.*]] : !fir.ref<f32>, @add_reduction_byref_f64 %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref<f64>)  for  (%[[VAL_23:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) {
-! CHECK:               fir.store %[[VAL_23]] to %[[VAL_15]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_24:.*]]:2 = hlfir.declare %[[VAL_19]] {uniq_name = "_QFmultiple_reductions_different_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_25:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFmultiple_reductions_different_typeEy"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
-! CHECK:               %[[VAL_26:.*]]:2 = hlfir.declare %[[VAL_21]] {uniq_name = "_QFmultiple_reductions_different_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-! CHECK:               %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_22]] {uniq_name = "_QFmultiple_reductions_different_typeEw"} : (!fir.ref<f64>) -> (!fir.ref<f64>, !fir.ref<f64>)
-! CHECK:               %[[VAL_28:.*]] = fir.load %[[VAL_24]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_29:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_30:.*]] = arith.addi %[[VAL_28]], %[[VAL_29]] : i32
-! CHECK:               hlfir.assign %[[VAL_30]] to %[[VAL_24]]#0 : i32, !fir.ref<i32>
-! CHECK:               %[[VAL_31:.*]] = fir.load %[[VAL_25]]#0 : !fir.ref<i64>
-! CHECK:               %[[VAL_32:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (i32) -> i64
-! CHECK:               %[[VAL_34:.*]] = arith.addi %[[VAL_31]], %[[VAL_33]] : i64
-! CHECK:               hlfir.assign %[[VAL_34]] to %[[VAL_25]]#0 : i64, !fir.ref<i64>
-! CHECK:               %[[VAL_35:.*]] = fir.load %[[VAL_26]]#0 : !fir.ref<f32>
-! CHECK:               %[[VAL_36:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_37:.*]] = fir.convert %[[VAL_36]] : (i32) -> f32
-! CHECK:               %[[VAL_38:.*]] = arith.addf %[[VAL_35]], %[[VAL_37]] fastmath<contract> : f32
-! CHECK:               hlfir.assign %[[VAL_38]] to %[[VAL_26]]#0 : f32, !fir.ref<f32>
-! CHECK:               %[[VAL_39:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref<f64>
-! CHECK:               %[[VAL_40:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> f64
-! CHECK:               %[[VAL_42:.*]] = arith.addf %[[VAL_39]], %[[VAL_41]] fastmath<contract> : f64
-! CHECK:               hlfir.assign %[[VAL_42]] to %[[VAL_27]]#0 : f64, !fir.ref<f64>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@add_reduction_byref_i32 %[[VAL_5]]#0 -> %[[VAL_19:.*]] : !fir.ref<i32>, @add_reduction_byref_i64 %[[VAL_7]]#0 -> %[[VAL_20:.*]] : !fir.ref<i64>, @add_reduction_byref_f32 %[[VAL_9]]#0 -> %[[VAL_21:.*]] : !fir.ref<f32>, @add_reduction_byref_f64 %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref<f64>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_23:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) {
+! CHECK:                 %[[VAL_24:.*]]:2 = hlfir.declare %[[VAL_19]] {uniq_name = "_QFmultiple_reductions_different_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 %[[VAL_25:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFmultiple_reductions_different_typeEy"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
+! CHECK:                 %[[VAL_26:.*]]:2 = hlfir.declare %[[VAL_21]] {uniq_name = "_QFmultiple_reductions_different_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:                 %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_22]] {uniq_name = "_QFmultiple_reductions_different_typeEw"} : (!fir.ref<f64>) -> (!fir.ref<f64>, !fir.ref<f64>)
+! CHECK:                 fir.store %[[VAL_23]] to %[[VAL_15]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_28:.*]] = fir.load %[[VAL_24]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_29:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_30:.*]] = arith.addi %[[VAL_28]], %[[VAL_29]] : i32
+! CHECK:                 hlfir.assign %[[VAL_30]] to %[[VAL_24]]#0 : i32, !fir.ref<i32>
+! CHECK:                 %[[VAL_31:.*]] = fir.load %[[VAL_25]]#0 : !fir.ref<i64>
+! CHECK:                 %[[VAL_32:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (i32) -> i64
+! CHECK:                 %[[VAL_34:.*]] = arith.addi %[[VAL_31]], %[[VAL_33]] : i64
+! CHECK:                 hlfir.assign %[[VAL_34]] to %[[VAL_25]]#0 : i64, !fir.ref<i64>
+! CHECK:                 %[[VAL_35:.*]] = fir.load %[[VAL_26]]#0 : !fir.ref<f32>
+! CHECK:                 %[[VAL_36:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_37:.*]] = fir.convert %[[VAL_36]] : (i32) -> f32
+! CHECK:                 %[[VAL_38:.*]] = arith.addf %[[VAL_35]], %[[VAL_37]] fastmath<contract> : f32
+! CHECK:                 hlfir.assign %[[VAL_38]] to %[[VAL_26]]#0 : f32, !fir.ref<f32>
+! CHECK:                 %[[VAL_39:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref<f64>
+! CHECK:                 %[[VAL_40:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> f64
+! CHECK:                 %[[VAL_42:.*]] = arith.addf %[[VAL_39]], %[[VAL_41]] fastmath<contract> : f64
+! CHECK:                 hlfir.assign %[[VAL_42]] to %[[VAL_27]]#0 : f64, !fir.ref<f64>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir-byref.f90
index 3b4d9666c69373..6a09fece80ae9d 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir-byref.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir-byref.f90
@@ -31,14 +31,16 @@
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@add_reduction_byref_i32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]])
-! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_14:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_15:.*]] = arith.addi %[[VAL_13]], %[[VAL_14]] : i32
-! CHECK:               hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : i32, !fir.ref<i32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@add_reduction_byref_i32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_14:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_15:.*]] = arith.addi %[[VAL_13]], %[[VAL_14]] : i32
+! CHECK:                 hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : i32, !fir.ref<i32>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir.f90
index 7c9070592e468e..c5cc5a95cef177 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir.f90
@@ -27,14 +27,16 @@
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_i32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]])
-! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_14:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_15:.*]] = arith.addi %[[VAL_13]], %[[VAL_14]] : i32
-! CHECK:               hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : i32, !fir.ref<i32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@add_reduction_i32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_14:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_15:.*]] = arith.addi %[[VAL_13]], %[[VAL_14]] : i32
+! CHECK:                 hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : i32, !fir.ref<i32>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-add.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-add.f90
index 11e1ffb79f8e4e..5b957959f40d50 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-add.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-add.f90
@@ -58,14 +58,17 @@
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_i32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_14:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_15:.*]] = arith.addi %[[VAL_13]], %[[VAL_14]] : i32
-! CHECK:               hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : i32, !fir.ref<i32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@add_reduction_i32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_14:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_15:.*]] = arith.addi %[[VAL_13]], %[[VAL_14]] : i32
+! CHECK:                 hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : i32, !fir.ref<i32>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
@@ -98,15 +101,18 @@ subroutine simple_int_reduction
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_f32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reductionEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<f32>
-! CHECK:               %[[VAL_14:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i32) -> f32
-! CHECK:               %[[VAL_16:.*]] = arith.addf %[[VAL_13]], %[[VAL_15]] fastmath<contract> : f32
-! CHECK:               hlfir.assign %[[VAL_16]] to %[[VAL_12]]#0 : f32, !fir.ref<f32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@add_reduction_f32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reductionEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<f32>
+! CHECK:                 %[[VAL_14:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i32) -> f32
+! CHECK:                 %[[VAL_16:.*]] = arith.addf %[[VAL_13]], %[[VAL_15]] fastmath<contract> : f32
+! CHECK:                 hlfir.assign %[[VAL_16]] to %[[VAL_12]]#0 : f32, !fir.ref<f32>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
@@ -139,14 +145,17 @@ subroutine simple_real_reduction
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_i32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reduction_switch_orderEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_14:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_15:.*]] = arith.addi %[[VAL_13]], %[[VAL_14]] : i32
-! CHECK:               hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : i32, !fir.ref<i32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@add_reduction_i32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reduction_switch_orderEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_14:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_15:.*]] = arith.addi %[[VAL_13]], %[[VAL_14]] : i32
+! CHECK:                 hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : i32, !fir.ref<i32>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
@@ -178,15 +187,18 @@ subroutine simple_int_reduction_switch_order
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_f32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reduction_switch_orderEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i32) -> f32
-! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<f32>
-! CHECK:               %[[VAL_16:.*]] = arith.addf %[[VAL_14]], %[[VAL_15]] fastmath<contract> : f32
-! CHECK:               hlfir.assign %[[VAL_16]] to %[[VAL_12]]#0 : f32, !fir.ref<f32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@add_reduction_f32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reduction_switch_orderEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i32) -> f32
+! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<f32>
+! CHECK:                 %[[VAL_16:.*]] = arith.addf %[[VAL_14]], %[[VAL_15]] fastmath<contract> : f32
+! CHECK:                 hlfir.assign %[[VAL_16]] to %[[VAL_12]]#0 : f32, !fir.ref<f32>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
@@ -226,24 +238,27 @@ subroutine simple_real_reduction_switch_order
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_15:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_i32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<i32>, @add_reduction_i32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<i32>, @add_reduction_i32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<i32>)  for  (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
-! CHECK:               fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_int_reductions_same_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_int_reductions_same_typeEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_int_reductions_same_typeEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_23:.*]] = fir.load %[[VAL_20]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_24:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_25:.*]] = arith.addi %[[VAL_23]], %[[VAL_24]] : i32
-! CHECK:               hlfir.assign %[[VAL_25]] to %[[VAL_20]]#0 : i32, !fir.ref<i32>
-! CHECK:               %[[VAL_26:.*]] = fir.load %[[VAL_21]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_27:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_28:.*]] = arith.addi %[[VAL_26]], %[[VAL_27]] : i32
-! CHECK:               hlfir.assign %[[VAL_28]] to %[[VAL_21]]#0 : i32, !fir.ref<i32>
-! CHECK:               %[[VAL_29:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_30:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_31:.*]] = arith.addi %[[VAL_29]], %[[VAL_30]] : i32
-! CHECK:               hlfir.assign %[[VAL_31]] to %[[VAL_22]]#0 : i32, !fir.ref<i32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@add_reduction_i32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<i32>, @add_reduction_i32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<i32>, @add_reduction_i32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<i32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
+! CHECK:                 %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_int_reductions_same_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_int_reductions_same_typeEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_int_reductions_same_typeEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_23:.*]] = fir.load %[[VAL_20]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_24:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_25:.*]] = arith.addi %[[VAL_23]], %[[VAL_24]] : i32
+! CHECK:                 hlfir.assign %[[VAL_25]] to %[[VAL_20]]#0 : i32, !fir.ref<i32>
+! CHECK:                 %[[VAL_26:.*]] = fir.load %[[VAL_21]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_27:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_28:.*]] = arith.addi %[[VAL_26]], %[[VAL_27]] : i32
+! CHECK:                 hlfir.assign %[[VAL_28]] to %[[VAL_21]]#0 : i32, !fir.ref<i32>
+! CHECK:                 %[[VAL_29:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_30:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_31:.*]] = arith.addi %[[VAL_29]], %[[VAL_30]] : i32
+! CHECK:                 hlfir.assign %[[VAL_31]] to %[[VAL_22]]#0 : i32, !fir.ref<i32>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
@@ -287,27 +302,30 @@ subroutine multiple_int_reductions_same_type
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_15:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_f32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<f32>, @add_reduction_f32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<f32>, @add_reduction_f32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<f32>)  for  (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
-! CHECK:               fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_real_reductions_same_typeEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-! CHECK:               %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_real_reductions_same_typeEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-! CHECK:               %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_real_reductions_same_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-! CHECK:               %[[VAL_23:.*]] = fir.load %[[VAL_20]]#0 : !fir.ref<f32>
-! CHECK:               %[[VAL_24:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (i32) -> f32
-! CHECK:               %[[VAL_26:.*]] = arith.addf %[[VAL_23]], %[[VAL_25]] fastmath<contract> : f32
-! CHECK:               hlfir.assign %[[VAL_26]] to %[[VAL_20]]#0 : f32, !fir.ref<f32>
-! CHECK:               %[[VAL_27:.*]] = fir.load %[[VAL_21]]#0 : !fir.ref<f32>
-! CHECK:               %[[VAL_28:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (i32) -> f32
-! CHECK:               %[[VAL_30:.*]] = arith.addf %[[VAL_27]], %[[VAL_29]] fastmath<contract> : f32
-! CHECK:               hlfir.assign %[[VAL_30]] to %[[VAL_21]]#0 : f32, !fir.ref<f32>
-! CHECK:               %[[VAL_31:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref<f32>
-! CHECK:               %[[VAL_32:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (i32) -> f32
-! CHECK:               %[[VAL_34:.*]] = arith.addf %[[VAL_31]], %[[VAL_33]] fastmath<contract> : f32
-! CHECK:               hlfir.assign %[[VAL_34]] to %[[VAL_22]]#0 : f32, !fir.ref<f32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@add_reduction_f32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<f32>, @add_reduction_f32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<f32>, @add_reduction_f32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<f32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
+! CHECK:                 %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_real_reductions_same_typeEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:                 %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_real_reductions_same_typeEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:                 %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_real_reductions_same_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:                 fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_23:.*]] = fir.load %[[VAL_20]]#0 : !fir.ref<f32>
+! CHECK:                 %[[VAL_24:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (i32) -> f32
+! CHECK:                 %[[VAL_26:.*]] = arith.addf %[[VAL_23]], %[[VAL_25]] fastmath<contract> : f32
+! CHECK:                 hlfir.assign %[[VAL_26]] to %[[VAL_20]]#0 : f32, !fir.ref<f32>
+! CHECK:                 %[[VAL_27:.*]] = fir.load %[[VAL_21]]#0 : !fir.ref<f32>
+! CHECK:                 %[[VAL_28:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (i32) -> f32
+! CHECK:                 %[[VAL_30:.*]] = arith.addf %[[VAL_27]], %[[VAL_29]] fastmath<contract> : f32
+! CHECK:                 hlfir.assign %[[VAL_30]] to %[[VAL_21]]#0 : f32, !fir.ref<f32>
+! CHECK:                 %[[VAL_31:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref<f32>
+! CHECK:                 %[[VAL_32:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (i32) -> f32
+! CHECK:                 %[[VAL_34:.*]] = arith.addf %[[VAL_31]], %[[VAL_33]] fastmath<contract> : f32
+! CHECK:                 hlfir.assign %[[VAL_34]] to %[[VAL_22]]#0 : f32, !fir.ref<f32>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
@@ -355,32 +373,35 @@ subroutine multiple_real_reductions_same_type
 ! CHECK:             %[[VAL_16:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_17:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_18:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_i32 %[[VAL_5]]#0 -> %[[VAL_19:.*]] : !fir.ref<i32>, @add_reduction_i64 %[[VAL_7]]#0 -> %[[VAL_20:.*]] : !fir.ref<i64>, @add_reduction_f32 %[[VAL_9]]#0 -> %[[VAL_21:.*]] : !fir.ref<f32>, @add_reduction_f64 %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref<f64>)  for  (%[[VAL_23:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) {
-! CHECK:               fir.store %[[VAL_23]] to %[[VAL_15]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_24:.*]]:2 = hlfir.declare %[[VAL_19]] {uniq_name = "_QFmultiple_reductions_different_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_25:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFmultiple_reductions_different_typeEy"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
-! CHECK:               %[[VAL_26:.*]]:2 = hlfir.declare %[[VAL_21]] {uniq_name = "_QFmultiple_reductions_different_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-! CHECK:               %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_22]] {uniq_name = "_QFmultiple_reductions_different_typeEw"} : (!fir.ref<f64>) -> (!fir.ref<f64>, !fir.ref<f64>)
-! CHECK:               %[[VAL_28:.*]] = fir.load %[[VAL_24]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_29:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_30:.*]] = arith.addi %[[VAL_28]], %[[VAL_29]] : i32
-! CHECK:               hlfir.assign %[[VAL_30]] to %[[VAL_24]]#0 : i32, !fir.ref<i32>
-! CHECK:               %[[VAL_31:.*]] = fir.load %[[VAL_25]]#0 : !fir.ref<i64>
-! CHECK:               %[[VAL_32:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (i32) -> i64
-! CHECK:               %[[VAL_34:.*]] = arith.addi %[[VAL_31]], %[[VAL_33]] : i64
-! CHECK:               hlfir.assign %[[VAL_34]] to %[[VAL_25]]#0 : i64, !fir.ref<i64>
-! CHECK:               %[[VAL_35:.*]] = fir.load %[[VAL_26]]#0 : !fir.ref<f32>
-! CHECK:               %[[VAL_36:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_37:.*]] = fir.convert %[[VAL_36]] : (i32) -> f32
-! CHECK:               %[[VAL_38:.*]] = arith.addf %[[VAL_35]], %[[VAL_37]] fastmath<contract> : f32
-! CHECK:               hlfir.assign %[[VAL_38]] to %[[VAL_26]]#0 : f32, !fir.ref<f32>
-! CHECK:               %[[VAL_39:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref<f64>
-! CHECK:               %[[VAL_40:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> f64
-! CHECK:               %[[VAL_42:.*]] = arith.addf %[[VAL_39]], %[[VAL_41]] fastmath<contract> : f64
-! CHECK:               hlfir.assign %[[VAL_42]] to %[[VAL_27]]#0 : f64, !fir.ref<f64>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@add_reduction_i32 %[[VAL_5]]#0 -> %[[VAL_19:.*]] : !fir.ref<i32>, @add_reduction_i64 %[[VAL_7]]#0 -> %[[VAL_20:.*]] : !fir.ref<i64>, @add_reduction_f32 %[[VAL_9]]#0 -> %[[VAL_21:.*]] : !fir.ref<f32>, @add_reduction_f64 %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref<f64>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_23:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) {
+! CHECK:                 %[[VAL_24:.*]]:2 = hlfir.declare %[[VAL_19]] {uniq_name = "_QFmultiple_reductions_different_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 %[[VAL_25:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFmultiple_reductions_different_typeEy"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
+! CHECK:                 %[[VAL_26:.*]]:2 = hlfir.declare %[[VAL_21]] {uniq_name = "_QFmultiple_reductions_different_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:                 %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_22]] {uniq_name = "_QFmultiple_reductions_different_typeEw"} : (!fir.ref<f64>) -> (!fir.ref<f64>, !fir.ref<f64>)
+! CHECK:                 fir.store %[[VAL_23]] to %[[VAL_15]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_28:.*]] = fir.load %[[VAL_24]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_29:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_30:.*]] = arith.addi %[[VAL_28]], %[[VAL_29]] : i32
+! CHECK:                 hlfir.assign %[[VAL_30]] to %[[VAL_24]]#0 : i32, !fir.ref<i32>
+! CHECK:                 %[[VAL_31:.*]] = fir.load %[[VAL_25]]#0 : !fir.ref<i64>
+! CHECK:                 %[[VAL_32:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (i32) -> i64
+! CHECK:                 %[[VAL_34:.*]] = arith.addi %[[VAL_31]], %[[VAL_33]] : i64
+! CHECK:                 hlfir.assign %[[VAL_34]] to %[[VAL_25]]#0 : i64, !fir.ref<i64>
+! CHECK:                 %[[VAL_35:.*]] = fir.load %[[VAL_26]]#0 : !fir.ref<f32>
+! CHECK:                 %[[VAL_36:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_37:.*]] = fir.convert %[[VAL_36]] : (i32) -> f32
+! CHECK:                 %[[VAL_38:.*]] = arith.addf %[[VAL_35]], %[[VAL_37]] fastmath<contract> : f32
+! CHECK:                 hlfir.assign %[[VAL_38]] to %[[VAL_26]]#0 : f32, !fir.ref<f32>
+! CHECK:                 %[[VAL_39:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref<f64>
+! CHECK:                 %[[VAL_40:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> f64
+! CHECK:                 %[[VAL_42:.*]] = arith.addf %[[VAL_39]], %[[VAL_41]] fastmath<contract> : f64
+! CHECK:                 hlfir.assign %[[VAL_42]] to %[[VAL_27]]#0 : f64, !fir.ref<f64>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-array.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-array.f90
index a20ed1ca83ce2b..7e5d4cb40a7b91 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-array.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-array.f90
@@ -63,22 +63,25 @@ program reduce
 ! CHECK:             %[[VAL_11:.*]] = fir.embox %[[VAL_5]]#1(%[[VAL_4]]) : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xi32>>
 ! CHECK:             %[[VAL_12:.*]] = fir.alloca !fir.box<!fir.array<2xi32>>
 ! CHECK:             fir.store %[[VAL_11]] to %[[VAL_12]] : !fir.ref<!fir.box<!fir.array<2xi32>>>
-! CHECK:             omp.wsloop byref reduction(@add_reduction_byref_box_2xi32 %[[VAL_12]] -> %[[VAL_13:.*]] : !fir.ref<!fir.box<!fir.array<2xi32>>>)  for  (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
-! CHECK:               fir.store %[[VAL_14]] to %[[VAL_7]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_13]] {uniq_name = "_QFEr"} : (!fir.ref<!fir.box<!fir.array<2xi32>>>) -> (!fir.ref<!fir.box<!fir.array<2xi32>>>, !fir.ref<!fir.box<!fir.array<2xi32>>>)
-! CHECK:               %[[VAL_16:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_17:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<!fir.box<!fir.array<2xi32>>>
-! CHECK:               %[[VAL_18:.*]] = arith.constant 1 : index
-! CHECK:               %[[VAL_19:.*]] = hlfir.designate %[[VAL_17]] (%[[VAL_18]])  : (!fir.box<!fir.array<2xi32>>, index) -> !fir.ref<i32>
-! CHECK:               hlfir.assign %[[VAL_16]] to %[[VAL_19]] : i32, !fir.ref<i32>
-! CHECK:               %[[VAL_20:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_21:.*]] = arith.constant 0 : i32
-! CHECK:               %[[VAL_22:.*]] = arith.subi %[[VAL_21]], %[[VAL_20]] : i32
-! CHECK:               %[[VAL_23:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<!fir.box<!fir.array<2xi32>>>
-! CHECK:               %[[VAL_24:.*]] = arith.constant 2 : index
-! CHECK:               %[[VAL_25:.*]] = hlfir.designate %[[VAL_23]] (%[[VAL_24]])  : (!fir.box<!fir.array<2xi32>>, index) -> !fir.ref<i32>
-! CHECK:               hlfir.assign %[[VAL_22]] to %[[VAL_25]] : i32, !fir.ref<i32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@add_reduction_byref_box_2xi32 %[[VAL_12]] -> %[[VAL_13:.*]] : !fir.ref<!fir.box<!fir.array<2xi32>>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
+! CHECK:                 %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_13]] {uniq_name = "_QFEr"} : (!fir.ref<!fir.box<!fir.array<2xi32>>>) -> (!fir.ref<!fir.box<!fir.array<2xi32>>>, !fir.ref<!fir.box<!fir.array<2xi32>>>)
+! CHECK:                 fir.store %[[VAL_14]] to %[[VAL_7]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_16:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_17:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<!fir.box<!fir.array<2xi32>>>
+! CHECK:                 %[[VAL_18:.*]] = arith.constant 1 : index
+! CHECK:                 %[[VAL_19:.*]] = hlfir.designate %[[VAL_17]] (%[[VAL_18]])  : (!fir.box<!fir.array<2xi32>>, index) -> !fir.ref<i32>
+! CHECK:                 hlfir.assign %[[VAL_16]] to %[[VAL_19]] : i32, !fir.ref<i32>
+! CHECK:                 %[[VAL_20:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_21:.*]] = arith.constant 0 : i32
+! CHECK:                 %[[VAL_22:.*]] = arith.subi %[[VAL_21]], %[[VAL_20]] : i32
+! CHECK:                 %[[VAL_23:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<!fir.box<!fir.array<2xi32>>>
+! CHECK:                 %[[VAL_24:.*]] = arith.constant 2 : index
+! CHECK:                 %[[VAL_25:.*]] = hlfir.designate %[[VAL_23]] (%[[VAL_24]])  : (!fir.box<!fir.array<2xi32>>, index) -> !fir.ref<i32>
+! CHECK:                 hlfir.assign %[[VAL_22]] to %[[VAL_25]] : i32, !fir.ref<i32>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90
index 61599876da8ea4..03f08d8207308a 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90
@@ -63,30 +63,33 @@ program reduce
 ! CHECK:             %[[VAL_11:.*]] = fir.embox %[[VAL_5]]#1(%[[VAL_4]]) : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xi32>>
 ! CHECK:             %[[VAL_12:.*]] = fir.alloca !fir.box<!fir.array<2xi32>>
 ! CHECK:             fir.store %[[VAL_11]] to %[[VAL_12]] : !fir.ref<!fir.box<!fir.array<2xi32>>>
-! CHECK:             omp.wsloop byref reduction(@add_reduction_byref_box_2xi32 %[[VAL_12]] -> %[[VAL_13:.*]] : !fir.ref<!fir.box<!fir.array<2xi32>>>)  for  (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
-! CHECK:               fir.store %[[VAL_14]] to %[[VAL_7]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_13]] {uniq_name = "_QFEr"} : (!fir.ref<!fir.box<!fir.array<2xi32>>>) -> (!fir.ref<!fir.box<!fir.array<2xi32>>>, !fir.ref<!fir.box<!fir.array<2xi32>>>)
-! CHECK:               %[[VAL_16:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<!fir.box<!fir.array<2xi32>>>
-! CHECK:               %[[VAL_17:.*]] = arith.constant 1 : index
-! CHECK:               %[[VAL_18:.*]] = hlfir.designate %[[VAL_16]] (%[[VAL_17]])  : (!fir.box<!fir.array<2xi32>>, index) -> !fir.ref<i32>
-! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_18]] : !fir.ref<i32>
-! CHECK:               %[[VAL_20:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_21:.*]] = arith.addi %[[VAL_19]], %[[VAL_20]] : i32
-! CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<!fir.box<!fir.array<2xi32>>>
-! CHECK:               %[[VAL_23:.*]] = arith.constant 1 : index
-! CHECK:               %[[VAL_24:.*]] = hlfir.designate %[[VAL_22]] (%[[VAL_23]])  : (!fir.box<!fir.array<2xi32>>, index) -> !fir.ref<i32>
-! CHECK:               hlfir.assign %[[VAL_21]] to %[[VAL_24]] : i32, !fir.ref<i32>
-! CHECK:               %[[VAL_25:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<!fir.box<!fir.array<2xi32>>>
-! CHECK:               %[[VAL_26:.*]] = arith.constant 2 : index
-! CHECK:               %[[VAL_27:.*]] = hlfir.designate %[[VAL_25]] (%[[VAL_26]])  : (!fir.box<!fir.array<2xi32>>, index) -> !fir.ref<i32>
-! CHECK:               %[[VAL_28:.*]] = fir.load %[[VAL_27]] : !fir.ref<i32>
-! CHECK:               %[[VAL_29:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_30:.*]] = arith.subi %[[VAL_28]], %[[VAL_29]] : i32
-! CHECK:               %[[VAL_31:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<!fir.box<!fir.array<2xi32>>>
-! CHECK:               %[[VAL_32:.*]] = arith.constant 2 : index
-! CHECK:               %[[VAL_33:.*]] = hlfir.designate %[[VAL_31]] (%[[VAL_32]])  : (!fir.box<!fir.array<2xi32>>, index) -> !fir.ref<i32>
-! CHECK:               hlfir.assign %[[VAL_30]] to %[[VAL_33]] : i32, !fir.ref<i32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@add_reduction_byref_box_2xi32 %[[VAL_12]] -> %[[VAL_13:.*]] : !fir.ref<!fir.box<!fir.array<2xi32>>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
+! CHECK:                 %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_13]] {uniq_name = "_QFEr"} : (!fir.ref<!fir.box<!fir.array<2xi32>>>) -> (!fir.ref<!fir.box<!fir.array<2xi32>>>, !fir.ref<!fir.box<!fir.array<2xi32>>>)
+! CHECK:                 fir.store %[[VAL_14]] to %[[VAL_7]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_16:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<!fir.box<!fir.array<2xi32>>>
+! CHECK:                 %[[VAL_17:.*]] = arith.constant 1 : index
+! CHECK:                 %[[VAL_18:.*]] = hlfir.designate %[[VAL_16]] (%[[VAL_17]])  : (!fir.box<!fir.array<2xi32>>, index) -> !fir.ref<i32>
+! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_18]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_20:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_21:.*]] = arith.addi %[[VAL_19]], %[[VAL_20]] : i32
+! CHECK:                 %[[VAL_22:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<!fir.box<!fir.array<2xi32>>>
+! CHECK:                 %[[VAL_23:.*]] = arith.constant 1 : index
+! CHECK:                 %[[VAL_24:.*]] = hlfir.designate %[[VAL_22]] (%[[VAL_23]])  : (!fir.box<!fir.array<2xi32>>, index) -> !fir.ref<i32>
+! CHECK:                 hlfir.assign %[[VAL_21]] to %[[VAL_24]] : i32, !fir.ref<i32>
+! CHECK:                 %[[VAL_25:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<!fir.box<!fir.array<2xi32>>>
+! CHECK:                 %[[VAL_26:.*]] = arith.constant 2 : index
+! CHECK:                 %[[VAL_27:.*]] = hlfir.designate %[[VAL_25]] (%[[VAL_26]])  : (!fir.box<!fir.array<2xi32>>, index) -> !fir.ref<i32>
+! CHECK:                 %[[VAL_28:.*]] = fir.load %[[VAL_27]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_29:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_30:.*]] = arith.subi %[[VAL_28]], %[[VAL_29]] : i32
+! CHECK:                 %[[VAL_31:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<!fir.box<!fir.array<2xi32>>>
+! CHECK:                 %[[VAL_32:.*]] = arith.constant 2 : index
+! CHECK:                 %[[VAL_33:.*]] = hlfir.designate %[[VAL_31]] (%[[VAL_32]])  : (!fir.box<!fir.array<2xi32>>, index) -> !fir.ref<i32>
+! CHECK:                 hlfir.assign %[[VAL_30]] to %[[VAL_33]] : i32, !fir.ref<i32>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-iand-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-iand-byref.f90
index e3f06a446ed4c1..40280c56dad6b3 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-iand-byref.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-iand-byref.f90
@@ -35,17 +35,19 @@
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@iand_byref_i32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>)  for  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
-! CHECK:               fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_iandEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
-! CHECK:               %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
-! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
-! CHECK:               %[[VAL_20:.*]] = arith.andi %[[VAL_18]], %[[VAL_19]] : i32
-! CHECK:               hlfir.assign %[[VAL_20]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@iand_byref_i32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK:                 %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_iandEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
+! CHECK:                 %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_20:.*]] = arith.andi %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK:                 hlfir.assign %[[VAL_20]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 
 
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-iand.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-iand.f90
index 746617e210624b..986892d3584f94 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-iand.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-iand.f90
@@ -29,17 +29,19 @@
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@iand_i32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>)  for  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
-! CHECK:               fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_iandEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
-! CHECK:               %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
-! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
-! CHECK:               %[[VAL_20:.*]] = arith.andi %[[VAL_18]], %[[VAL_19]] : i32
-! CHECK:               hlfir.assign %[[VAL_20]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@iand_i32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK:                 %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_iandEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
+! CHECK:                 %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_20:.*]] = arith.andi %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK:                 hlfir.assign %[[VAL_20]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 
 
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-ieor-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-ieor-byref.f90
index 7e3a283bf783c8..ee33ce2f348d87 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-ieor-byref.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-ieor-byref.f90
@@ -28,9 +28,10 @@
 !CHECK: omp.parallel
 !CHECK: %[[I_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 !CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %[[I_REF]] {uniq_name = "_QFreduction_ieorEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK: omp.wsloop byref reduction(@ieor_byref_i32 %[[X_DECL]]#0 -> %[[PRV:.+]] : !fir.ref<i32>) for
-!CHECK: fir.store %{{.*}} to %[[I_DECL]]#1 : !fir.ref<i32>
+!CHECK: omp.wsloop byref reduction(@ieor_byref_i32 %[[X_DECL]]#0 -> %[[PRV:.+]] : !fir.ref<i32>)
+!CHECK-NEXT: omp.loop_nest
 !CHECK: %[[PRV_DECL:.+]]:2 = hlfir.declare %[[PRV]] {{.*}} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: fir.store %{{.*}} to %[[I_DECL]]#1 : !fir.ref<i32>
 !CHECK: %[[I_32:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref<i32>
 !CHECK: %[[I_64:.*]] = fir.convert %[[I_32]] : (i32) -> i64
 !CHECK: %[[Y_I_REF:.*]] = hlfir.designate %[[Y_DECL]]#0 (%[[I_64]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
@@ -40,6 +41,7 @@
 !CHECK: hlfir.assign %[[RES]] to %[[PRV_DECL]]#0 : i32, !fir.ref<i32>
 !CHECK: omp.yield
 !CHECK: omp.terminator
+!CHECK: omp.terminator
 
 subroutine reduction_ieor(y)
   integer :: x, y(:)
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-ieor.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-ieor.f90
index 11245c4ac95e03..b362731b33710a 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-ieor.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-ieor.f90
@@ -19,9 +19,10 @@
 !CHECK: omp.parallel
 !CHECK: %[[I_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 !CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %[[I_REF]] {uniq_name = "_QFreduction_ieorEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK: omp.wsloop reduction(@[[IEOR_DECLARE_I]] %[[X_DECL]]#0 -> %[[PRV:.+]] : !fir.ref<i32>) for
-!CHECK: fir.store %{{.*}} to %[[I_DECL]]#1 : !fir.ref<i32>
+!CHECK: omp.wsloop reduction(@[[IEOR_DECLARE_I]] %[[X_DECL]]#0 -> %[[PRV:.+]] : !fir.ref<i32>)
+!CHECK-NEXT: omp.loop_nest
 !CHECK: %[[PRV_DECL:.+]]:2 = hlfir.declare %[[PRV]] {{.*}} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: fir.store %{{.*}} to %[[I_DECL]]#1 : !fir.ref<i32>
 !CHECK: %[[I_32:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref<i32>
 !CHECK: %[[I_64:.*]] = fir.convert %[[I_32]] : (i32) -> i64
 !CHECK: %[[Y_I_REF:.*]] = hlfir.designate %[[Y_DECL]]#0 (%[[I_64]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
@@ -31,6 +32,7 @@
 !CHECK: hlfir.assign %[[RES]] to %[[PRV_DECL]]#0 : i32, !fir.ref<i32>
 !CHECK: omp.yield
 !CHECK: omp.terminator
+!CHECK: omp.terminator
 
 subroutine reduction_ieor(y)
   integer :: x, y(:)
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-ior-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-ior-byref.f90
index c7f8e8bdede548..0052773bb5adc6 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-ior-byref.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-ior-byref.f90
@@ -33,17 +33,19 @@
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@ior_byref_i32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>)  for  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]])
-! CHECK:               fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_iorEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
-! CHECK:               %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
-! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
-! CHECK:               %[[VAL_20:.*]] = arith.ori %[[VAL_18]], %[[VAL_19]] : i32
-! CHECK:               hlfir.assign %[[VAL_20]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@ior_byref_i32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK:                 %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_iorEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
+! CHECK:                 %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_20:.*]] = arith.ori %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK:                 hlfir.assign %[[VAL_20]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 
 
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-ior.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-ior.f90
index dd0bbeb1a0761f..f32be43b9b71a5 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-ior.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-ior.f90
@@ -29,17 +29,19 @@
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@ior_i32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>)  for  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]])
-! CHECK:               fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_iorEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
-! CHECK:               %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
-! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
-! CHECK:               %[[VAL_20:.*]] = arith.ori %[[VAL_18]], %[[VAL_19]] : i32
-! CHECK:               hlfir.assign %[[VAL_20]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@ior_i32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK:                 %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_iorEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
+! CHECK:                 %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_20:.*]] = arith.ori %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK:                 hlfir.assign %[[VAL_20]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 
 
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-and-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-and-byref.f90
index 59411534e4a5c0..dfc018ed7c5aa8 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-logical-and-byref.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-and-byref.f90
@@ -42,20 +42,22 @@
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
-! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (i32) -> i64
-! CHECK:               %[[VAL_21:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_20]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_23:.*]] = fir.convert %[[VAL_18]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_25:.*]] = arith.andi %[[VAL_23]], %[[VAL_24]] : i1
-! CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:                 %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (i32) -> i64
+! CHECK:                 %[[VAL_21:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_20]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_23:.*]] = fir.convert %[[VAL_18]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_25:.*]] = arith.andi %[[VAL_23]], %[[VAL_24]] : i1
+! CHECK:                 %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
@@ -90,20 +92,22 @@ end subroutine simple_reduction
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
-! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (i32) -> i64
-! CHECK:               %[[VAL_20:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_19]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_23:.*]] = fir.convert %[[VAL_21]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_25:.*]] = arith.andi %[[VAL_23]], %[[VAL_24]] : i1
-! CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:                 %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (i32) -> i64
+! CHECK:                 %[[VAL_20:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_19]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_22:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_23:.*]] = fir.convert %[[VAL_21]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_25:.*]] = arith.andi %[[VAL_23]], %[[VAL_24]] : i1
+! CHECK:                 %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
@@ -147,42 +151,44 @@ subroutine simple_reduction_switch_order(y)
 ! CHECK:             %[[VAL_20:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_21:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_22:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@and_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @and_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @and_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
-! CHECK:               fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_30:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_31:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
-! CHECK:               %[[VAL_33:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_32]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_34:.*]] = fir.load %[[VAL_33]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_35:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_36:.*]] = fir.convert %[[VAL_34]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_37:.*]] = arith.andi %[[VAL_35]], %[[VAL_36]] : i1
-! CHECK:               %[[VAL_38:.*]] = fir.convert %[[VAL_37]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_38]] to %[[VAL_27]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_39:.*]] = fir.load %[[VAL_28]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_40:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> i64
-! CHECK:               %[[VAL_42:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_41]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_44:.*]] = fir.convert %[[VAL_39]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_45:.*]] = fir.convert %[[VAL_43]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_46:.*]] = arith.andi %[[VAL_44]], %[[VAL_45]] : i1
-! CHECK:               %[[VAL_47:.*]] = fir.convert %[[VAL_46]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_47]] to %[[VAL_28]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_48:.*]] = fir.load %[[VAL_29]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_49:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_50:.*]] = fir.convert %[[VAL_49]] : (i32) -> i64
-! CHECK:               %[[VAL_51:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_50]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_52:.*]] = fir.load %[[VAL_51]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_53:.*]] = fir.convert %[[VAL_48]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_54:.*]] = fir.convert %[[VAL_52]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_55:.*]] = arith.andi %[[VAL_53]], %[[VAL_54]] : i1
-! CHECK:               %[[VAL_56:.*]] = fir.convert %[[VAL_55]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_56]] to %[[VAL_29]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@and_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @and_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @and_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
+! CHECK:                 %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_30:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_31:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
+! CHECK:                 %[[VAL_33:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_32]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_34:.*]] = fir.load %[[VAL_33]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_35:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_36:.*]] = fir.convert %[[VAL_34]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_37:.*]] = arith.andi %[[VAL_35]], %[[VAL_36]] : i1
+! CHECK:                 %[[VAL_38:.*]] = fir.convert %[[VAL_37]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_38]] to %[[VAL_27]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_39:.*]] = fir.load %[[VAL_28]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_40:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> i64
+! CHECK:                 %[[VAL_42:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_41]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_44:.*]] = fir.convert %[[VAL_39]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_45:.*]] = fir.convert %[[VAL_43]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_46:.*]] = arith.andi %[[VAL_44]], %[[VAL_45]] : i1
+! CHECK:                 %[[VAL_47:.*]] = fir.convert %[[VAL_46]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_47]] to %[[VAL_28]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_48:.*]] = fir.load %[[VAL_29]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_49:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_50:.*]] = fir.convert %[[VAL_49]] : (i32) -> i64
+! CHECK:                 %[[VAL_51:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_50]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_52:.*]] = fir.load %[[VAL_51]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_53:.*]] = fir.convert %[[VAL_48]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_54:.*]] = fir.convert %[[VAL_52]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_55:.*]] = arith.andi %[[VAL_53]], %[[VAL_54]] : i1
+! CHECK:                 %[[VAL_56:.*]] = fir.convert %[[VAL_55]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_56]] to %[[VAL_29]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-and.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-and.f90
index 9ca733281c2f03..c529bd4755b6c6 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-logical-and.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-and.f90
@@ -36,20 +36,22 @@
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
-! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (i32) -> i64
-! CHECK:               %[[VAL_21:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_20]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_23:.*]] = fir.convert %[[VAL_18]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_25:.*]] = arith.andi %[[VAL_23]], %[[VAL_24]] : i1
-! CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:                 %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (i32) -> i64
+! CHECK:                 %[[VAL_21:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_20]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_23:.*]] = fir.convert %[[VAL_18]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_25:.*]] = arith.andi %[[VAL_23]], %[[VAL_24]] : i1
+! CHECK:                 %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
@@ -84,20 +86,22 @@ end subroutine simple_reduction
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
-! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (i32) -> i64
-! CHECK:               %[[VAL_20:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_19]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_23:.*]] = fir.convert %[[VAL_21]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_25:.*]] = arith.andi %[[VAL_23]], %[[VAL_24]] : i1
-! CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:                 %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (i32) -> i64
+! CHECK:                 %[[VAL_20:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_19]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_22:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_23:.*]] = fir.convert %[[VAL_21]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_25:.*]] = arith.andi %[[VAL_23]], %[[VAL_24]] : i1
+! CHECK:                 %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
@@ -141,42 +145,44 @@ subroutine simple_reduction_switch_order(y)
 ! CHECK:             %[[VAL_20:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_21:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_22:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@and_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @and_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @and_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
-! CHECK:               fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_30:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_31:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
-! CHECK:               %[[VAL_33:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_32]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_34:.*]] = fir.load %[[VAL_33]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_35:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_36:.*]] = fir.convert %[[VAL_34]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_37:.*]] = arith.andi %[[VAL_35]], %[[VAL_36]] : i1
-! CHECK:               %[[VAL_38:.*]] = fir.convert %[[VAL_37]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_38]] to %[[VAL_27]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_39:.*]] = fir.load %[[VAL_28]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_40:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> i64
-! CHECK:               %[[VAL_42:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_41]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_44:.*]] = fir.convert %[[VAL_39]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_45:.*]] = fir.convert %[[VAL_43]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_46:.*]] = arith.andi %[[VAL_44]], %[[VAL_45]] : i1
-! CHECK:               %[[VAL_47:.*]] = fir.convert %[[VAL_46]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_47]] to %[[VAL_28]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_48:.*]] = fir.load %[[VAL_29]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_49:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_50:.*]] = fir.convert %[[VAL_49]] : (i32) -> i64
-! CHECK:               %[[VAL_51:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_50]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_52:.*]] = fir.load %[[VAL_51]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_53:.*]] = fir.convert %[[VAL_48]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_54:.*]] = fir.convert %[[VAL_52]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_55:.*]] = arith.andi %[[VAL_53]], %[[VAL_54]] : i1
-! CHECK:               %[[VAL_56:.*]] = fir.convert %[[VAL_55]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_56]] to %[[VAL_29]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@and_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @and_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @and_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
+! CHECK:                 %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_30:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_31:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
+! CHECK:                 %[[VAL_33:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_32]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_34:.*]] = fir.load %[[VAL_33]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_35:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_36:.*]] = fir.convert %[[VAL_34]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_37:.*]] = arith.andi %[[VAL_35]], %[[VAL_36]] : i1
+! CHECK:                 %[[VAL_38:.*]] = fir.convert %[[VAL_37]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_38]] to %[[VAL_27]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_39:.*]] = fir.load %[[VAL_28]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_40:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> i64
+! CHECK:                 %[[VAL_42:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_41]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_44:.*]] = fir.convert %[[VAL_39]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_45:.*]] = fir.convert %[[VAL_43]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_46:.*]] = arith.andi %[[VAL_44]], %[[VAL_45]] : i1
+! CHECK:                 %[[VAL_47:.*]] = fir.convert %[[VAL_46]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_47]] to %[[VAL_28]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_48:.*]] = fir.load %[[VAL_29]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_49:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_50:.*]] = fir.convert %[[VAL_49]] : (i32) -> i64
+! CHECK:                 %[[VAL_51:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_50]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_52:.*]] = fir.load %[[VAL_51]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_53:.*]] = fir.convert %[[VAL_48]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_54:.*]] = fir.convert %[[VAL_52]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_55:.*]] = arith.andi %[[VAL_53]], %[[VAL_54]] : i1
+! CHECK:                 %[[VAL_56:.*]] = fir.convert %[[VAL_55]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_56]] to %[[VAL_29]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv-byref.f90
index 1d6e1b0545c3bc..a54795a4446f47 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv-byref.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv-byref.f90
@@ -42,20 +42,22 @@
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
-! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (i32) -> i64
-! CHECK:               %[[VAL_21:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_20]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_23:.*]] = fir.convert %[[VAL_18]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_25:.*]] = arith.cmpi eq, %[[VAL_23]], %[[VAL_24]] : i1
-! CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:                 %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (i32) -> i64
+! CHECK:                 %[[VAL_21:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_20]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_23:.*]] = fir.convert %[[VAL_18]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_25:.*]] = arith.cmpi eq, %[[VAL_23]], %[[VAL_24]] : i1
+! CHECK:                 %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
@@ -89,20 +91,22 @@ subroutine simple_reduction(y)
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
-! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (i32) -> i64
-! CHECK:               %[[VAL_20:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_19]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_23:.*]] = fir.convert %[[VAL_21]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_25:.*]] = arith.cmpi eq, %[[VAL_23]], %[[VAL_24]] : i1
-! CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:                 %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (i32) -> i64
+! CHECK:                 %[[VAL_20:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_19]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_22:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_23:.*]] = fir.convert %[[VAL_21]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_25:.*]] = arith.cmpi eq, %[[VAL_23]], %[[VAL_24]] : i1
+! CHECK:                 %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
@@ -146,42 +150,44 @@ subroutine simple_reduction_switch_order(y)
 ! CHECK:             %[[VAL_20:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_21:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_22:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@eqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
-! CHECK:               fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_30:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_31:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
-! CHECK:               %[[VAL_33:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_32]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_34:.*]] = fir.load %[[VAL_33]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_35:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_36:.*]] = fir.convert %[[VAL_34]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_37:.*]] = arith.cmpi eq, %[[VAL_35]], %[[VAL_36]] : i1
-! CHECK:               %[[VAL_38:.*]] = fir.convert %[[VAL_37]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_38]] to %[[VAL_27]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_39:.*]] = fir.load %[[VAL_28]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_40:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> i64
-! CHECK:               %[[VAL_42:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_41]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_44:.*]] = fir.convert %[[VAL_39]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_45:.*]] = fir.convert %[[VAL_43]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_46:.*]] = arith.cmpi eq, %[[VAL_44]], %[[VAL_45]] : i1
-! CHECK:               %[[VAL_47:.*]] = fir.convert %[[VAL_46]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_47]] to %[[VAL_28]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_48:.*]] = fir.load %[[VAL_29]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_49:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_50:.*]] = fir.convert %[[VAL_49]] : (i32) -> i64
-! CHECK:               %[[VAL_51:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_50]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_52:.*]] = fir.load %[[VAL_51]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_53:.*]] = fir.convert %[[VAL_48]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_54:.*]] = fir.convert %[[VAL_52]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_55:.*]] = arith.cmpi eq, %[[VAL_53]], %[[VAL_54]] : i1
-! CHECK:               %[[VAL_56:.*]] = fir.convert %[[VAL_55]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_56]] to %[[VAL_29]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@eqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
+! CHECK:                 %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_30:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_31:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
+! CHECK:                 %[[VAL_33:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_32]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_34:.*]] = fir.load %[[VAL_33]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_35:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_36:.*]] = fir.convert %[[VAL_34]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_37:.*]] = arith.cmpi eq, %[[VAL_35]], %[[VAL_36]] : i1
+! CHECK:                 %[[VAL_38:.*]] = fir.convert %[[VAL_37]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_38]] to %[[VAL_27]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_39:.*]] = fir.load %[[VAL_28]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_40:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> i64
+! CHECK:                 %[[VAL_42:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_41]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_44:.*]] = fir.convert %[[VAL_39]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_45:.*]] = fir.convert %[[VAL_43]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_46:.*]] = arith.cmpi eq, %[[VAL_44]], %[[VAL_45]] : i1
+! CHECK:                 %[[VAL_47:.*]] = fir.convert %[[VAL_46]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_47]] to %[[VAL_28]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_48:.*]] = fir.load %[[VAL_29]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_49:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_50:.*]] = fir.convert %[[VAL_49]] : (i32) -> i64
+! CHECK:                 %[[VAL_51:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_50]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_52:.*]] = fir.load %[[VAL_51]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_53:.*]] = fir.convert %[[VAL_48]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_54:.*]] = fir.convert %[[VAL_52]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_55:.*]] = arith.cmpi eq, %[[VAL_53]], %[[VAL_54]] : i1
+! CHECK:                 %[[VAL_56:.*]] = fir.convert %[[VAL_55]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_56]] to %[[VAL_29]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv.f90
index a1bfa462cd599a..1021b5926b9179 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv.f90
@@ -36,20 +36,22 @@
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
-! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (i32) -> i64
-! CHECK:               %[[VAL_21:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_20]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_23:.*]] = fir.convert %[[VAL_18]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_25:.*]] = arith.cmpi eq, %[[VAL_23]], %[[VAL_24]] : i1
-! CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:                 %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (i32) -> i64
+! CHECK:                 %[[VAL_21:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_20]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_23:.*]] = fir.convert %[[VAL_18]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_25:.*]] = arith.cmpi eq, %[[VAL_23]], %[[VAL_24]] : i1
+! CHECK:                 %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
@@ -83,20 +85,22 @@ subroutine simple_reduction(y)
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
-! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (i32) -> i64
-! CHECK:               %[[VAL_20:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_19]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_23:.*]] = fir.convert %[[VAL_21]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_25:.*]] = arith.cmpi eq, %[[VAL_23]], %[[VAL_24]] : i1
-! CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:                 %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (i32) -> i64
+! CHECK:                 %[[VAL_20:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_19]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_22:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_23:.*]] = fir.convert %[[VAL_21]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_25:.*]] = arith.cmpi eq, %[[VAL_23]], %[[VAL_24]] : i1
+! CHECK:                 %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
@@ -140,42 +144,44 @@ subroutine simple_reduction_switch_order(y)
 ! CHECK:             %[[VAL_20:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_21:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_22:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
-! CHECK:               fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_30:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_31:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
-! CHECK:               %[[VAL_33:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_32]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_34:.*]] = fir.load %[[VAL_33]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_35:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_36:.*]] = fir.convert %[[VAL_34]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_37:.*]] = arith.cmpi eq, %[[VAL_35]], %[[VAL_36]] : i1
-! CHECK:               %[[VAL_38:.*]] = fir.convert %[[VAL_37]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_38]] to %[[VAL_27]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_39:.*]] = fir.load %[[VAL_28]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_40:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> i64
-! CHECK:               %[[VAL_42:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_41]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_44:.*]] = fir.convert %[[VAL_39]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_45:.*]] = fir.convert %[[VAL_43]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_46:.*]] = arith.cmpi eq, %[[VAL_44]], %[[VAL_45]] : i1
-! CHECK:               %[[VAL_47:.*]] = fir.convert %[[VAL_46]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_47]] to %[[VAL_28]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_48:.*]] = fir.load %[[VAL_29]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_49:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_50:.*]] = fir.convert %[[VAL_49]] : (i32) -> i64
-! CHECK:               %[[VAL_51:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_50]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_52:.*]] = fir.load %[[VAL_51]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_53:.*]] = fir.convert %[[VAL_48]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_54:.*]] = fir.convert %[[VAL_52]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_55:.*]] = arith.cmpi eq, %[[VAL_53]], %[[VAL_54]] : i1
-! CHECK:               %[[VAL_56:.*]] = fir.convert %[[VAL_55]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_56]] to %[[VAL_29]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
+! CHECK:                 %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_30:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_31:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
+! CHECK:                 %[[VAL_33:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_32]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_34:.*]] = fir.load %[[VAL_33]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_35:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_36:.*]] = fir.convert %[[VAL_34]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_37:.*]] = arith.cmpi eq, %[[VAL_35]], %[[VAL_36]] : i1
+! CHECK:                 %[[VAL_38:.*]] = fir.convert %[[VAL_37]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_38]] to %[[VAL_27]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_39:.*]] = fir.load %[[VAL_28]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_40:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> i64
+! CHECK:                 %[[VAL_42:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_41]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_44:.*]] = fir.convert %[[VAL_39]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_45:.*]] = fir.convert %[[VAL_43]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_46:.*]] = arith.cmpi eq, %[[VAL_44]], %[[VAL_45]] : i1
+! CHECK:                 %[[VAL_47:.*]] = fir.convert %[[VAL_46]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_47]] to %[[VAL_28]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_48:.*]] = fir.load %[[VAL_29]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_49:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_50:.*]] = fir.convert %[[VAL_49]] : (i32) -> i64
+! CHECK:                 %[[VAL_51:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_50]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_52:.*]] = fir.load %[[VAL_51]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_53:.*]] = fir.convert %[[VAL_48]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_54:.*]] = fir.convert %[[VAL_52]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_55:.*]] = arith.cmpi eq, %[[VAL_53]], %[[VAL_54]] : i1
+! CHECK:                 %[[VAL_56:.*]] = fir.convert %[[VAL_55]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_56]] to %[[VAL_29]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv-byref.f90
index a94b67a97832fc..854cb19ecd750c 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv-byref.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv-byref.f90
@@ -42,20 +42,22 @@
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
-! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (i32) -> i64
-! CHECK:               %[[VAL_21:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_20]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_23:.*]] = fir.convert %[[VAL_18]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_25:.*]] = arith.cmpi ne, %[[VAL_23]], %[[VAL_24]] : i1
-! CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:                 %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (i32) -> i64
+! CHECK:                 %[[VAL_21:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_20]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_23:.*]] = fir.convert %[[VAL_18]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_25:.*]] = arith.cmpi ne, %[[VAL_23]], %[[VAL_24]] : i1
+! CHECK:                 %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
@@ -90,20 +92,22 @@ subroutine simple_reduction(y)
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
-! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (i32) -> i64
-! CHECK:               %[[VAL_20:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_19]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_23:.*]] = fir.convert %[[VAL_21]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_25:.*]] = arith.cmpi ne, %[[VAL_23]], %[[VAL_24]] : i1
-! CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:                 %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (i32) -> i64
+! CHECK:                 %[[VAL_20:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_19]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_22:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_23:.*]] = fir.convert %[[VAL_21]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_25:.*]] = arith.cmpi ne, %[[VAL_23]], %[[VAL_24]] : i1
+! CHECK:                 %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
@@ -149,42 +153,44 @@ subroutine simple_reduction_switch_order(y)
 ! CHECK:             %[[VAL_20:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_21:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_22:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@neqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
-! CHECK:               fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_30:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_31:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
-! CHECK:               %[[VAL_33:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_32]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_34:.*]] = fir.load %[[VAL_33]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_35:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_36:.*]] = fir.convert %[[VAL_34]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_37:.*]] = arith.cmpi ne, %[[VAL_35]], %[[VAL_36]] : i1
-! CHECK:               %[[VAL_38:.*]] = fir.convert %[[VAL_37]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_38]] to %[[VAL_27]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_39:.*]] = fir.load %[[VAL_28]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_40:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> i64
-! CHECK:               %[[VAL_42:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_41]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_44:.*]] = fir.convert %[[VAL_39]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_45:.*]] = fir.convert %[[VAL_43]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_46:.*]] = arith.cmpi ne, %[[VAL_44]], %[[VAL_45]] : i1
-! CHECK:               %[[VAL_47:.*]] = fir.convert %[[VAL_46]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_47]] to %[[VAL_28]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_48:.*]] = fir.load %[[VAL_29]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_49:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_50:.*]] = fir.convert %[[VAL_49]] : (i32) -> i64
-! CHECK:               %[[VAL_51:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_50]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_52:.*]] = fir.load %[[VAL_51]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_53:.*]] = fir.convert %[[VAL_48]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_54:.*]] = fir.convert %[[VAL_52]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_55:.*]] = arith.cmpi ne, %[[VAL_53]], %[[VAL_54]] : i1
-! CHECK:               %[[VAL_56:.*]] = fir.convert %[[VAL_55]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_56]] to %[[VAL_29]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@neqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
+! CHECK:                 %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_30:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_31:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
+! CHECK:                 %[[VAL_33:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_32]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_34:.*]] = fir.load %[[VAL_33]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_35:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_36:.*]] = fir.convert %[[VAL_34]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_37:.*]] = arith.cmpi ne, %[[VAL_35]], %[[VAL_36]] : i1
+! CHECK:                 %[[VAL_38:.*]] = fir.convert %[[VAL_37]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_38]] to %[[VAL_27]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_39:.*]] = fir.load %[[VAL_28]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_40:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> i64
+! CHECK:                 %[[VAL_42:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_41]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_44:.*]] = fir.convert %[[VAL_39]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_45:.*]] = fir.convert %[[VAL_43]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_46:.*]] = arith.cmpi ne, %[[VAL_44]], %[[VAL_45]] : i1
+! CHECK:                 %[[VAL_47:.*]] = fir.convert %[[VAL_46]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_47]] to %[[VAL_28]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_48:.*]] = fir.load %[[VAL_29]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_49:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_50:.*]] = fir.convert %[[VAL_49]] : (i32) -> i64
+! CHECK:                 %[[VAL_51:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_50]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_52:.*]] = fir.load %[[VAL_51]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_53:.*]] = fir.convert %[[VAL_48]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_54:.*]] = fir.convert %[[VAL_52]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_55:.*]] = arith.cmpi ne, %[[VAL_53]], %[[VAL_54]] : i1
+! CHECK:                 %[[VAL_56:.*]] = fir.convert %[[VAL_55]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_56]] to %[[VAL_29]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 ! CHECK:         }
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv.f90
index 08d6a2efd39936..f5c84aaaf4858b 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv.f90
@@ -36,20 +36,22 @@
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
-! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (i32) -> i64
-! CHECK:               %[[VAL_21:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_20]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_23:.*]] = fir.convert %[[VAL_18]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_25:.*]] = arith.cmpi ne, %[[VAL_23]], %[[VAL_24]] : i1
-! CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:                 %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (i32) -> i64
+! CHECK:                 %[[VAL_21:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_20]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_23:.*]] = fir.convert %[[VAL_18]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_25:.*]] = arith.cmpi ne, %[[VAL_23]], %[[VAL_24]] : i1
+! CHECK:                 %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
@@ -84,20 +86,22 @@ subroutine simple_reduction(y)
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
-! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (i32) -> i64
-! CHECK:               %[[VAL_20:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_19]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_23:.*]] = fir.convert %[[VAL_21]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_25:.*]] = arith.cmpi ne, %[[VAL_23]], %[[VAL_24]] : i1
-! CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:                 %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (i32) -> i64
+! CHECK:                 %[[VAL_20:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_19]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_22:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_23:.*]] = fir.convert %[[VAL_21]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_25:.*]] = arith.cmpi ne, %[[VAL_23]], %[[VAL_24]] : i1
+! CHECK:                 %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
@@ -143,42 +147,44 @@ subroutine simple_reduction_switch_order(y)
 ! CHECK:             %[[VAL_20:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_21:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_22:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
-! CHECK:               fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_30:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_31:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
-! CHECK:               %[[VAL_33:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_32]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_34:.*]] = fir.load %[[VAL_33]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_35:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_36:.*]] = fir.convert %[[VAL_34]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_37:.*]] = arith.cmpi ne, %[[VAL_35]], %[[VAL_36]] : i1
-! CHECK:               %[[VAL_38:.*]] = fir.convert %[[VAL_37]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_38]] to %[[VAL_27]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_39:.*]] = fir.load %[[VAL_28]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_40:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> i64
-! CHECK:               %[[VAL_42:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_41]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_44:.*]] = fir.convert %[[VAL_39]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_45:.*]] = fir.convert %[[VAL_43]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_46:.*]] = arith.cmpi ne, %[[VAL_44]], %[[VAL_45]] : i1
-! CHECK:               %[[VAL_47:.*]] = fir.convert %[[VAL_46]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_47]] to %[[VAL_28]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_48:.*]] = fir.load %[[VAL_29]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_49:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_50:.*]] = fir.convert %[[VAL_49]] : (i32) -> i64
-! CHECK:               %[[VAL_51:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_50]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_52:.*]] = fir.load %[[VAL_51]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_53:.*]] = fir.convert %[[VAL_48]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_54:.*]] = fir.convert %[[VAL_52]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_55:.*]] = arith.cmpi ne, %[[VAL_53]], %[[VAL_54]] : i1
-! CHECK:               %[[VAL_56:.*]] = fir.convert %[[VAL_55]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_56]] to %[[VAL_29]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
+! CHECK:                 %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_30:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_31:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
+! CHECK:                 %[[VAL_33:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_32]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_34:.*]] = fir.load %[[VAL_33]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_35:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_36:.*]] = fir.convert %[[VAL_34]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_37:.*]] = arith.cmpi ne, %[[VAL_35]], %[[VAL_36]] : i1
+! CHECK:                 %[[VAL_38:.*]] = fir.convert %[[VAL_37]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_38]] to %[[VAL_27]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_39:.*]] = fir.load %[[VAL_28]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_40:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> i64
+! CHECK:                 %[[VAL_42:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_41]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_44:.*]] = fir.convert %[[VAL_39]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_45:.*]] = fir.convert %[[VAL_43]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_46:.*]] = arith.cmpi ne, %[[VAL_44]], %[[VAL_45]] : i1
+! CHECK:                 %[[VAL_47:.*]] = fir.convert %[[VAL_46]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_47]] to %[[VAL_28]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_48:.*]] = fir.load %[[VAL_29]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_49:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_50:.*]] = fir.convert %[[VAL_49]] : (i32) -> i64
+! CHECK:                 %[[VAL_51:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_50]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_52:.*]] = fir.load %[[VAL_51]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_53:.*]] = fir.convert %[[VAL_48]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_54:.*]] = fir.convert %[[VAL_52]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_55:.*]] = arith.cmpi ne, %[[VAL_53]], %[[VAL_54]] : i1
+! CHECK:                 %[[VAL_56:.*]] = fir.convert %[[VAL_55]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_56]] to %[[VAL_29]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 ! CHECK:         }
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-or-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-or-byref.f90
index ca69ccee4a38e3..e268c6ff6cf51e 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-logical-or-byref.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-or-byref.f90
@@ -41,20 +41,22 @@
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
-! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (i32) -> i64
-! CHECK:               %[[VAL_21:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_20]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_23:.*]] = fir.convert %[[VAL_18]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_25:.*]] = arith.ori %[[VAL_23]], %[[VAL_24]] : i1
-! CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:                 %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (i32) -> i64
+! CHECK:                 %[[VAL_21:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_20]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_23:.*]] = fir.convert %[[VAL_18]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_25:.*]] = arith.ori %[[VAL_23]], %[[VAL_24]] : i1
+! CHECK:                 %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
@@ -88,20 +90,22 @@ subroutine simple_reduction(y)
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
-! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (i32) -> i64
-! CHECK:               %[[VAL_20:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_19]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_23:.*]] = fir.convert %[[VAL_21]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_25:.*]] = arith.ori %[[VAL_23]], %[[VAL_24]] : i1
-! CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:                 %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (i32) -> i64
+! CHECK:                 %[[VAL_20:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_19]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_22:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_23:.*]] = fir.convert %[[VAL_21]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_25:.*]] = arith.ori %[[VAL_23]], %[[VAL_24]] : i1
+! CHECK:                 %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
@@ -145,42 +149,44 @@ subroutine simple_reduction_switch_order(y)
 ! CHECK:             %[[VAL_20:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_21:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_22:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@or_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @or_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @or_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
-! CHECK:               fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_30:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_31:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
-! CHECK:               %[[VAL_33:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_32]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_34:.*]] = fir.load %[[VAL_33]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_35:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_36:.*]] = fir.convert %[[VAL_34]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_37:.*]] = arith.ori %[[VAL_35]], %[[VAL_36]] : i1
-! CHECK:               %[[VAL_38:.*]] = fir.convert %[[VAL_37]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_38]] to %[[VAL_27]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_39:.*]] = fir.load %[[VAL_28]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_40:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> i64
-! CHECK:               %[[VAL_42:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_41]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_44:.*]] = fir.convert %[[VAL_39]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_45:.*]] = fir.convert %[[VAL_43]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_46:.*]] = arith.ori %[[VAL_44]], %[[VAL_45]] : i1
-! CHECK:               %[[VAL_47:.*]] = fir.convert %[[VAL_46]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_47]] to %[[VAL_28]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_48:.*]] = fir.load %[[VAL_29]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_49:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_50:.*]] = fir.convert %[[VAL_49]] : (i32) -> i64
-! CHECK:               %[[VAL_51:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_50]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_52:.*]] = fir.load %[[VAL_51]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_53:.*]] = fir.convert %[[VAL_48]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_54:.*]] = fir.convert %[[VAL_52]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_55:.*]] = arith.ori %[[VAL_53]], %[[VAL_54]] : i1
-! CHECK:               %[[VAL_56:.*]] = fir.convert %[[VAL_55]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_56]] to %[[VAL_29]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@or_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @or_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @or_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
+! CHECK:                 %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_30:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_31:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
+! CHECK:                 %[[VAL_33:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_32]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_34:.*]] = fir.load %[[VAL_33]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_35:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_36:.*]] = fir.convert %[[VAL_34]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_37:.*]] = arith.ori %[[VAL_35]], %[[VAL_36]] : i1
+! CHECK:                 %[[VAL_38:.*]] = fir.convert %[[VAL_37]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_38]] to %[[VAL_27]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_39:.*]] = fir.load %[[VAL_28]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_40:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> i64
+! CHECK:                 %[[VAL_42:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_41]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_44:.*]] = fir.convert %[[VAL_39]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_45:.*]] = fir.convert %[[VAL_43]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_46:.*]] = arith.ori %[[VAL_44]], %[[VAL_45]] : i1
+! CHECK:                 %[[VAL_47:.*]] = fir.convert %[[VAL_46]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_47]] to %[[VAL_28]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_48:.*]] = fir.load %[[VAL_29]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_49:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_50:.*]] = fir.convert %[[VAL_49]] : (i32) -> i64
+! CHECK:                 %[[VAL_51:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_50]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_52:.*]] = fir.load %[[VAL_51]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_53:.*]] = fir.convert %[[VAL_48]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_54:.*]] = fir.convert %[[VAL_52]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_55:.*]] = arith.ori %[[VAL_53]], %[[VAL_54]] : i1
+! CHECK:                 %[[VAL_56:.*]] = fir.convert %[[VAL_55]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_56]] to %[[VAL_29]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-or.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-or.f90
index c4bf8e9d65ae7b..26dc0c327aad1a 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-logical-or.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-or.f90
@@ -36,20 +36,22 @@
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
-! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (i32) -> i64
-! CHECK:               %[[VAL_21:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_20]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_23:.*]] = fir.convert %[[VAL_18]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_25:.*]] = arith.ori %[[VAL_23]], %[[VAL_24]] : i1
-! CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:                 %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (i32) -> i64
+! CHECK:                 %[[VAL_21:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_20]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_23:.*]] = fir.convert %[[VAL_18]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_25:.*]] = arith.ori %[[VAL_23]], %[[VAL_24]] : i1
+! CHECK:                 %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
@@ -83,20 +85,22 @@ subroutine simple_reduction(y)
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
-! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (i32) -> i64
-! CHECK:               %[[VAL_20:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_19]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_23:.*]] = fir.convert %[[VAL_21]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_25:.*]] = arith.ori %[[VAL_23]], %[[VAL_24]] : i1
-! CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:                 %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (i32) -> i64
+! CHECK:                 %[[VAL_20:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_19]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_22:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_23:.*]] = fir.convert %[[VAL_21]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_25:.*]] = arith.ori %[[VAL_23]], %[[VAL_24]] : i1
+! CHECK:                 %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
@@ -140,42 +144,44 @@ subroutine simple_reduction_switch_order(y)
 ! CHECK:             %[[VAL_20:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_21:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_22:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@or_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @or_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @or_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
-! CHECK:               fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-! CHECK:               %[[VAL_30:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_31:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
-! CHECK:               %[[VAL_33:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_32]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_34:.*]] = fir.load %[[VAL_33]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_35:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_36:.*]] = fir.convert %[[VAL_34]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_37:.*]] = arith.ori %[[VAL_35]], %[[VAL_36]] : i1
-! CHECK:               %[[VAL_38:.*]] = fir.convert %[[VAL_37]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_38]] to %[[VAL_27]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_39:.*]] = fir.load %[[VAL_28]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_40:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> i64
-! CHECK:               %[[VAL_42:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_41]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_44:.*]] = fir.convert %[[VAL_39]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_45:.*]] = fir.convert %[[VAL_43]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_46:.*]] = arith.ori %[[VAL_44]], %[[VAL_45]] : i1
-! CHECK:               %[[VAL_47:.*]] = fir.convert %[[VAL_46]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_47]] to %[[VAL_28]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_48:.*]] = fir.load %[[VAL_29]]#0 : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_49:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_50:.*]] = fir.convert %[[VAL_49]] : (i32) -> i64
-! CHECK:               %[[VAL_51:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_50]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_52:.*]] = fir.load %[[VAL_51]] : !fir.ref<!fir.logical<4>>
-! CHECK:               %[[VAL_53:.*]] = fir.convert %[[VAL_48]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_54:.*]] = fir.convert %[[VAL_52]] : (!fir.logical<4>) -> i1
-! CHECK:               %[[VAL_55:.*]] = arith.ori %[[VAL_53]], %[[VAL_54]] : i1
-! CHECK:               %[[VAL_56:.*]] = fir.convert %[[VAL_55]] : (i1) -> !fir.logical<4>
-! CHECK:               hlfir.assign %[[VAL_56]] to %[[VAL_29]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@or_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @or_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @or_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
+! CHECK:                 %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:                 fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_30:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_31:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
+! CHECK:                 %[[VAL_33:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_32]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_34:.*]] = fir.load %[[VAL_33]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_35:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_36:.*]] = fir.convert %[[VAL_34]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_37:.*]] = arith.ori %[[VAL_35]], %[[VAL_36]] : i1
+! CHECK:                 %[[VAL_38:.*]] = fir.convert %[[VAL_37]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_38]] to %[[VAL_27]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_39:.*]] = fir.load %[[VAL_28]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_40:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> i64
+! CHECK:                 %[[VAL_42:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_41]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_44:.*]] = fir.convert %[[VAL_39]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_45:.*]] = fir.convert %[[VAL_43]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_46:.*]] = arith.ori %[[VAL_44]], %[[VAL_45]] : i1
+! CHECK:                 %[[VAL_47:.*]] = fir.convert %[[VAL_46]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_47]] to %[[VAL_28]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_48:.*]] = fir.load %[[VAL_29]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_49:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_50:.*]] = fir.convert %[[VAL_49]] : (i32) -> i64
+! CHECK:                 %[[VAL_51:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_50]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_52:.*]] = fir.load %[[VAL_51]] : !fir.ref<!fir.logical<4>>
+! CHECK:                 %[[VAL_53:.*]] = fir.convert %[[VAL_48]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_54:.*]] = fir.convert %[[VAL_52]] : (!fir.logical<4>) -> i1
+! CHECK:                 %[[VAL_55:.*]] = arith.ori %[[VAL_53]], %[[VAL_54]] : i1
+! CHECK:                 %[[VAL_56:.*]] = fir.convert %[[VAL_55]] : (i1) -> !fir.logical<4>
+! CHECK:                 hlfir.assign %[[VAL_56]] to %[[VAL_29]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-max-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-max-byref.f90
index ee562bbe15863e..d004c9a99482e7 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-max-byref.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-max-byref.f90
@@ -46,18 +46,20 @@
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@max_byref_i32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>)  for  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
-! CHECK:               fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
-! CHECK:               %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
-! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
-! CHECK:               %[[VAL_20:.*]] = arith.cmpi sgt, %[[VAL_18]], %[[VAL_19]] : i32
-! CHECK:               %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : i32
-! CHECK:               hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@max_byref_i32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK:                 %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
+! CHECK:                 %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_20:.*]] = arith.cmpi sgt, %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK:                 %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK:                 hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 
 ! CHECK-LABEL:   func.func @_QPreduction_max_real(
@@ -75,18 +77,20 @@
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@max_byref_f32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<f32>)  for  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
-! CHECK:               fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
-! CHECK:               %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
-! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_17]] : !fir.ref<f32>
-! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<f32>
-! CHECK:               %[[VAL_20:.*]] = arith.cmpf ogt, %[[VAL_18]], %[[VAL_19]] fastmath<contract> : f32
-! CHECK:               %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : f32
-! CHECK:               hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : f32, !fir.ref<f32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@max_byref_f32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<f32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK:                 %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:                 fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
+! CHECK:                 %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_17]] : !fir.ref<f32>
+! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<f32>
+! CHECK:                 %[[VAL_20:.*]] = arith.cmpf ogt, %[[VAL_18]], %[[VAL_19]] fastmath<contract> : f32
+! CHECK:                 %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : f32
+! CHECK:                 hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : f32, !fir.ref<f32>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           omp.parallel {
 ! CHECK:             %[[VAL_30:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
@@ -94,24 +98,26 @@
 ! CHECK:             %[[VAL_32:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_33:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_34:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@max_byref_f32 %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref<f32>)  for  (%[[VAL_36:.*]]) : i32 = (%[[VAL_32]]) to (%[[VAL_33]]) inclusive step (%[[VAL_34]]) {
-! CHECK:               fir.store %[[VAL_36]] to %[[VAL_31]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_37:.*]]:2 = hlfir.declare %[[VAL_35]] {uniq_name = "_QFreduction_max_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-! CHECK:               %[[VAL_38:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_39:.*]] = fir.convert %[[VAL_38]] : (i32) -> i64
-! CHECK:               %[[VAL_40:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_39]])  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
-! CHECK:               %[[VAL_41:.*]] = fir.load %[[VAL_40]] : !fir.ref<f32>
-! CHECK:               %[[VAL_42:.*]] = fir.load %[[VAL_37]]#0 : !fir.ref<f32>
-! CHECK:               %[[VAL_43:.*]] = arith.cmpf ogt, %[[VAL_41]], %[[VAL_42]] fastmath<contract> : f32
-! CHECK:               fir.if %[[VAL_43]] {
-! CHECK:                 %[[VAL_44:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref<i32>
-! CHECK:                 %[[VAL_45:.*]] = fir.convert %[[VAL_44]] : (i32) -> i64
-! CHECK:                 %[[VAL_46:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_45]])  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
-! CHECK:                 %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref<f32>
-! CHECK:                 hlfir.assign %[[VAL_47]] to %[[VAL_37]]#0 : f32, !fir.ref<f32>
-! CHECK:               } else {
-! CHECK:               }
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@max_byref_f32 %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref<f32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_36:.*]]) : i32 = (%[[VAL_32]]) to (%[[VAL_33]]) inclusive step (%[[VAL_34]]) {
+! CHECK:                 %[[VAL_37:.*]]:2 = hlfir.declare %[[VAL_35]] {uniq_name = "_QFreduction_max_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:                 fir.store %[[VAL_36]] to %[[VAL_31]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_38:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_39:.*]] = fir.convert %[[VAL_38]] : (i32) -> i64
+! CHECK:                 %[[VAL_40:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_39]])  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK:                 %[[VAL_41:.*]] = fir.load %[[VAL_40]] : !fir.ref<f32>
+! CHECK:                 %[[VAL_42:.*]] = fir.load %[[VAL_37]]#0 : !fir.ref<f32>
+! CHECK:                 %[[VAL_43:.*]] = arith.cmpf ogt, %[[VAL_41]], %[[VAL_42]] fastmath<contract> : f32
+! CHECK:                 fir.if %[[VAL_43]] {
+! CHECK:                   %[[VAL_44:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref<i32>
+! CHECK:                   %[[VAL_45:.*]] = fir.convert %[[VAL_44]] : (i32) -> i64
+! CHECK:                   %[[VAL_46:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_45]])  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK:                   %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref<f32>
+! CHECK:                   hlfir.assign %[[VAL_47]] to %[[VAL_37]]#0 : f32, !fir.ref<f32>
+! CHECK:                 } else {
+! CHECK:                 }
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 
 
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-max-hlfir-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-max-hlfir-byref.f90
index 10bba6ac4b51bd..352888bb94f512 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-max-hlfir-byref.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-max-hlfir-byref.f90
@@ -33,18 +33,20 @@
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@max_byref_i32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>)  for  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
-! CHECK:               fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
-! CHECK:               %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
-! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
-! CHECK:               %[[VAL_20:.*]] = arith.cmpi sgt, %[[VAL_18]], %[[VAL_19]] : i32
-! CHECK:               %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : i32
-! CHECK:               hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@max_byref_i32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK:                 %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
+! CHECK:                 %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_20:.*]] = arith.cmpi sgt, %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK:                 %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK:                 hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 
 
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-max-hlfir.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-max-hlfir.f90
index 5ea5d6626f186d..f4caea5a269a18 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-max-hlfir.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-max-hlfir.f90
@@ -29,18 +29,20 @@
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@max_i32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>)  for  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
-! CHECK:               fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
-! CHECK:               %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
-! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
-! CHECK:               %[[VAL_20:.*]] = arith.cmpi sgt, %[[VAL_18]], %[[VAL_19]] : i32
-! CHECK:               %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : i32
-! CHECK:               hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@max_i32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK:                 %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
+! CHECK:                 %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_20:.*]] = arith.cmpi sgt, %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK:                 %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK:                 hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 
 
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-max.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-max.f90
index 6f11f0ec96a7d3..a925570c56b861 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-max.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-max.f90
@@ -40,18 +40,20 @@
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@max_i32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>)  for  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
-! CHECK:               fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
-! CHECK:               %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
-! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
-! CHECK:               %[[VAL_20:.*]] = arith.cmpi sgt, %[[VAL_18]], %[[VAL_19]] : i32
-! CHECK:               %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : i32
-! CHECK:               hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@max_i32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK:                 %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
+! CHECK:                 %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_20:.*]] = arith.cmpi sgt, %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK:                 %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK:                 hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 
 ! CHECK-LABEL:   func.func @_QPreduction_max_real(
@@ -69,18 +71,20 @@
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@max_f32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<f32>)  for  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
-! CHECK:               fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
-! CHECK:               %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
-! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_17]] : !fir.ref<f32>
-! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<f32>
-! CHECK:               %[[VAL_20:.*]] = arith.cmpf ogt, %[[VAL_18]], %[[VAL_19]] fastmath<contract> : f32
-! CHECK:               %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : f32
-! CHECK:               hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : f32, !fir.ref<f32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@max_f32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<f32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK:                 %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:                 fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
+! CHECK:                 %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_17]] : !fir.ref<f32>
+! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<f32>
+! CHECK:                 %[[VAL_20:.*]] = arith.cmpf ogt, %[[VAL_18]], %[[VAL_19]] fastmath<contract> : f32
+! CHECK:                 %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : f32
+! CHECK:                 hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : f32, !fir.ref<f32>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           omp.parallel {
 ! CHECK:             %[[VAL_30:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
@@ -88,24 +92,26 @@
 ! CHECK:             %[[VAL_32:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_33:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_34:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@max_f32 %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref<f32>)  for  (%[[VAL_36:.*]]) : i32 = (%[[VAL_32]]) to (%[[VAL_33]]) inclusive step (%[[VAL_34]]) {
-! CHECK:               fir.store %[[VAL_36]] to %[[VAL_31]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_37:.*]]:2 = hlfir.declare %[[VAL_35]] {uniq_name = "_QFreduction_max_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-! CHECK:               %[[VAL_38:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_39:.*]] = fir.convert %[[VAL_38]] : (i32) -> i64
-! CHECK:               %[[VAL_40:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_39]])  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
-! CHECK:               %[[VAL_41:.*]] = fir.load %[[VAL_40]] : !fir.ref<f32>
-! CHECK:               %[[VAL_42:.*]] = fir.load %[[VAL_37]]#0 : !fir.ref<f32>
-! CHECK:               %[[VAL_43:.*]] = arith.cmpf ogt, %[[VAL_41]], %[[VAL_42]] fastmath<contract> : f32
-! CHECK:               fir.if %[[VAL_43]] {
-! CHECK:                 %[[VAL_44:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref<i32>
-! CHECK:                 %[[VAL_45:.*]] = fir.convert %[[VAL_44]] : (i32) -> i64
-! CHECK:                 %[[VAL_46:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_45]])  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
-! CHECK:                 %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref<f32>
-! CHECK:                 hlfir.assign %[[VAL_47]] to %[[VAL_37]]#0 : f32, !fir.ref<f32>
-! CHECK:               } else {
-! CHECK:               }
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@max_f32 %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref<f32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_36:.*]]) : i32 = (%[[VAL_32]]) to (%[[VAL_33]]) inclusive step (%[[VAL_34]]) {
+! CHECK:                 %[[VAL_37:.*]]:2 = hlfir.declare %[[VAL_35]] {uniq_name = "_QFreduction_max_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:                 fir.store %[[VAL_36]] to %[[VAL_31]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_38:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_39:.*]] = fir.convert %[[VAL_38]] : (i32) -> i64
+! CHECK:                 %[[VAL_40:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_39]])  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK:                 %[[VAL_41:.*]] = fir.load %[[VAL_40]] : !fir.ref<f32>
+! CHECK:                 %[[VAL_42:.*]] = fir.load %[[VAL_37]]#0 : !fir.ref<f32>
+! CHECK:                 %[[VAL_43:.*]] = arith.cmpf ogt, %[[VAL_41]], %[[VAL_42]] fastmath<contract> : f32
+! CHECK:                 fir.if %[[VAL_43]] {
+! CHECK:                   %[[VAL_44:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref<i32>
+! CHECK:                   %[[VAL_45:.*]] = fir.convert %[[VAL_44]] : (i32) -> i64
+! CHECK:                   %[[VAL_46:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_45]])  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK:                   %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref<f32>
+! CHECK:                   hlfir.assign %[[VAL_47]] to %[[VAL_37]]#0 : f32, !fir.ref<f32>
+! CHECK:                 } else {
+! CHECK:                 }
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 
 
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-min-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-min-byref.f90
index c0372117a03b9d..aae5c536894f98 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-min-byref.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-min-byref.f90
@@ -46,18 +46,20 @@
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@min_byref_i32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>)  for  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
-! CHECK:               fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_min_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
-! CHECK:               %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
-! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
-! CHECK:               %[[VAL_20:.*]] = arith.cmpi slt, %[[VAL_18]], %[[VAL_19]] : i32
-! CHECK:               %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : i32
-! CHECK:               hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@min_byref_i32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK:                 %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_min_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
+! CHECK:                 %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_20:.*]] = arith.cmpi slt, %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK:                 %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK:                 hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 
 ! CHECK-LABEL:   func.func @_QPreduction_min_real(
@@ -75,19 +77,21 @@
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@min_byref_f32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<f32>)  for  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
-! CHECK:               fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_min_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
-! CHECK:               %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
-! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_17]] : !fir.ref<f32>
-! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<f32>
-! CHECK:               %[[VAL_20:.*]] = arith.cmpf olt, %[[VAL_18]], %[[VAL_19]] fastmath<contract> : f32
-! CHECK:               %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : f32
-! CHECK:               hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : f32, !fir.ref<f32>
-! CHECK:               omp.yield
-! CHECK:             }
+! CHECK:             omp.wsloop byref reduction(@min_byref_f32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<f32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK:                 %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_min_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:                 fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
+! CHECK:                 %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_17]] : !fir.ref<f32>
+! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<f32>
+! CHECK:                 %[[VAL_20:.*]] = arith.cmpf olt, %[[VAL_18]], %[[VAL_19]] fastmath<contract> : f32
+! CHECK:                 %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : f32
+! CHECK:                 hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : f32, !fir.ref<f32>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           }
 ! CHECK:           omp.parallel {
@@ -96,24 +100,26 @@
 ! CHECK:             %[[VAL_32:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_33:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_34:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@min_byref_f32 %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref<f32>)  for  (%[[VAL_36:.*]]) : i32 = (%[[VAL_32]]) to (%[[VAL_33]]) inclusive step (%[[VAL_34]]) {
-! CHECK:               fir.store %[[VAL_36]] to %[[VAL_31]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_37:.*]]:2 = hlfir.declare %[[VAL_35]] {uniq_name = "_QFreduction_min_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-! CHECK:               %[[VAL_38:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_39:.*]] = fir.convert %[[VAL_38]] : (i32) -> i64
-! CHECK:               %[[VAL_40:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_39]])  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
-! CHECK:               %[[VAL_41:.*]] = fir.load %[[VAL_40]] : !fir.ref<f32>
-! CHECK:               %[[VAL_42:.*]] = fir.load %[[VAL_37]]#0 : !fir.ref<f32>
-! CHECK:               %[[VAL_43:.*]] = arith.cmpf ogt, %[[VAL_41]], %[[VAL_42]] fastmath<contract> : f32
-! CHECK:               fir.if %[[VAL_43]] {
-! CHECK:                 %[[VAL_44:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref<i32>
-! CHECK:                 %[[VAL_45:.*]] = fir.convert %[[VAL_44]] : (i32) -> i64
-! CHECK:                 %[[VAL_46:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_45]])  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
-! CHECK:                 %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref<f32>
-! CHECK:                 hlfir.assign %[[VAL_47]] to %[[VAL_37]]#0 : f32, !fir.ref<f32>
-! CHECK:               } else {
-! CHECK:               }
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@min_byref_f32 %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref<f32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_36:.*]]) : i32 = (%[[VAL_32]]) to (%[[VAL_33]]) inclusive step (%[[VAL_34]]) {
+! CHECK:                 %[[VAL_37:.*]]:2 = hlfir.declare %[[VAL_35]] {uniq_name = "_QFreduction_min_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:                 fir.store %[[VAL_36]] to %[[VAL_31]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_38:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_39:.*]] = fir.convert %[[VAL_38]] : (i32) -> i64
+! CHECK:                 %[[VAL_40:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_39]])  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK:                 %[[VAL_41:.*]] = fir.load %[[VAL_40]] : !fir.ref<f32>
+! CHECK:                 %[[VAL_42:.*]] = fir.load %[[VAL_37]]#0 : !fir.ref<f32>
+! CHECK:                 %[[VAL_43:.*]] = arith.cmpf ogt, %[[VAL_41]], %[[VAL_42]] fastmath<contract> : f32
+! CHECK:                 fir.if %[[VAL_43]] {
+! CHECK:                   %[[VAL_44:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref<i32>
+! CHECK:                   %[[VAL_45:.*]] = fir.convert %[[VAL_44]] : (i32) -> i64
+! CHECK:                   %[[VAL_46:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_45]])  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK:                   %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref<f32>
+! CHECK:                   hlfir.assign %[[VAL_47]] to %[[VAL_37]]#0 : f32, !fir.ref<f32>
+! CHECK:                 } else {
+! CHECK:                 }
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 
 
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-min.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-min.f90
index 2c694f82e279a4..7958c65005562d 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-min.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-min.f90
@@ -40,18 +40,20 @@
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@min_i32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>)  for  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
-! CHECK:               fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_min_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
-! CHECK:               %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
-! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
-! CHECK:               %[[VAL_20:.*]] = arith.cmpi slt, %[[VAL_18]], %[[VAL_19]] : i32
-! CHECK:               %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : i32
-! CHECK:               hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@min_i32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK:                 %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_min_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
+! CHECK:                 %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_20:.*]] = arith.cmpi slt, %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK:                 %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK:                 hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 
 ! CHECK-LABEL:   func.func @_QPreduction_min_real(
@@ -69,19 +71,21 @@
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@min_f32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<f32>)  for  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
-! CHECK:               fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_min_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
-! CHECK:               %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
-! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_17]] : !fir.ref<f32>
-! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<f32>
-! CHECK:               %[[VAL_20:.*]] = arith.cmpf olt, %[[VAL_18]], %[[VAL_19]] fastmath<contract> : f32
-! CHECK:               %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : f32
-! CHECK:               hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : f32, !fir.ref<f32>
-! CHECK:               omp.yield
-! CHECK:             }
+! CHECK:             omp.wsloop reduction(@min_f32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<f32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK:                 %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_min_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:                 fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
+! CHECK:                 %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_17]] : !fir.ref<f32>
+! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<f32>
+! CHECK:                 %[[VAL_20:.*]] = arith.cmpf olt, %[[VAL_18]], %[[VAL_19]] fastmath<contract> : f32
+! CHECK:                 %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : f32
+! CHECK:                 hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : f32, !fir.ref<f32>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           }
 ! CHECK:           omp.parallel {
@@ -90,24 +94,26 @@
 ! CHECK:             %[[VAL_32:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_33:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_34:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@min_f32 %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref<f32>)  for  (%[[VAL_36:.*]]) : i32 = (%[[VAL_32]]) to (%[[VAL_33]]) inclusive step (%[[VAL_34]]) {
-! CHECK:               fir.store %[[VAL_36]] to %[[VAL_31]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_37:.*]]:2 = hlfir.declare %[[VAL_35]] {uniq_name = "_QFreduction_min_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-! CHECK:               %[[VAL_38:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_39:.*]] = fir.convert %[[VAL_38]] : (i32) -> i64
-! CHECK:               %[[VAL_40:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_39]])  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
-! CHECK:               %[[VAL_41:.*]] = fir.load %[[VAL_40]] : !fir.ref<f32>
-! CHECK:               %[[VAL_42:.*]] = fir.load %[[VAL_37]]#0 : !fir.ref<f32>
-! CHECK:               %[[VAL_43:.*]] = arith.cmpf ogt, %[[VAL_41]], %[[VAL_42]] fastmath<contract> : f32
-! CHECK:               fir.if %[[VAL_43]] {
-! CHECK:                 %[[VAL_44:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref<i32>
-! CHECK:                 %[[VAL_45:.*]] = fir.convert %[[VAL_44]] : (i32) -> i64
-! CHECK:                 %[[VAL_46:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_45]])  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
-! CHECK:                 %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref<f32>
-! CHECK:                 hlfir.assign %[[VAL_47]] to %[[VAL_37]]#0 : f32, !fir.ref<f32>
-! CHECK:               } else {
-! CHECK:               }
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@min_f32 %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref<f32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_36:.*]]) : i32 = (%[[VAL_32]]) to (%[[VAL_33]]) inclusive step (%[[VAL_34]]) {
+! CHECK:                 %[[VAL_37:.*]]:2 = hlfir.declare %[[VAL_35]] {uniq_name = "_QFreduction_min_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:                 fir.store %[[VAL_36]] to %[[VAL_31]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_38:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_39:.*]] = fir.convert %[[VAL_38]] : (i32) -> i64
+! CHECK:                 %[[VAL_40:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_39]])  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK:                 %[[VAL_41:.*]] = fir.load %[[VAL_40]] : !fir.ref<f32>
+! CHECK:                 %[[VAL_42:.*]] = fir.load %[[VAL_37]]#0 : !fir.ref<f32>
+! CHECK:                 %[[VAL_43:.*]] = arith.cmpf ogt, %[[VAL_41]], %[[VAL_42]] fastmath<contract> : f32
+! CHECK:                 fir.if %[[VAL_43]] {
+! CHECK:                   %[[VAL_44:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref<i32>
+! CHECK:                   %[[VAL_45:.*]] = fir.convert %[[VAL_44]] : (i32) -> i64
+! CHECK:                   %[[VAL_46:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_45]])  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK:                   %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref<f32>
+! CHECK:                   hlfir.assign %[[VAL_47]] to %[[VAL_37]]#0 : f32, !fir.ref<f32>
+! CHECK:                 } else {
+! CHECK:                 }
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 
 
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-min2.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-min2.f90
index 0138a957820615..a4c99f190dd2e5 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-min2.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-min2.f90
@@ -39,12 +39,14 @@ program reduce
 ! CHECK:             %[[VAL_6:.*]] = arith.constant 0 : i32
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@min_i32 %[[VAL_3]]#0 -> %[[VAL_9:.*]] : !fir.ref<i32>)  for  (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
-! CHECK:               fir.store %[[VAL_10]] to %[[VAL_5]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_9]] {uniq_name = "_QFEr"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_12:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
-! CHECK:               hlfir.assign %[[VAL_12]] to %[[VAL_11]]#0 : i32, !fir.ref<i32>
-! CHECK:               omp.yield
-! CHECK:             }
+! CHECK:             omp.wsloop reduction(@min_i32 %[[VAL_3]]#0 -> %[[VAL_9:.*]] : !fir.ref<i32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
+! CHECK:                 %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_9]] {uniq_name = "_QFEr"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 fir.store %[[VAL_10]] to %[[VAL_5]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_12:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+! CHECK:                 hlfir.assign %[[VAL_12]] to %[[VAL_11]]#0 : i32, !fir.ref<i32>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           }
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-mul-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-mul-byref.f90
index a2829948d472a8..7c538cdd470f8b 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-mul-byref.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-mul-byref.f90
@@ -85,14 +85,16 @@
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@multiply_reduction_byref_i32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_14:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_15:.*]] = arith.muli %[[VAL_13]], %[[VAL_14]] : i32
-! CHECK:               hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : i32, !fir.ref<i32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@multiply_reduction_byref_i32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_14:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_15:.*]] = arith.muli %[[VAL_13]], %[[VAL_14]] : i32
+! CHECK:                 hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : i32, !fir.ref<i32>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
@@ -121,15 +123,17 @@ subroutine simple_int_reduction
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@multiply_reduction_byref_f32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reductionEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<f32>
-! CHECK:               %[[VAL_14:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i32) -> f32
-! CHECK:               %[[VAL_16:.*]] = arith.mulf %[[VAL_13]], %[[VAL_15]] fastmath<contract> : f32
-! CHECK:               hlfir.assign %[[VAL_16]] to %[[VAL_12]]#0 : f32, !fir.ref<f32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@multiply_reduction_byref_f32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reductionEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<f32>
+! CHECK:                 %[[VAL_14:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i32) -> f32
+! CHECK:                 %[[VAL_16:.*]] = arith.mulf %[[VAL_13]], %[[VAL_15]] fastmath<contract> : f32
+! CHECK:                 hlfir.assign %[[VAL_16]] to %[[VAL_12]]#0 : f32, !fir.ref<f32>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
@@ -158,14 +162,16 @@ subroutine simple_real_reduction
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@multiply_reduction_byref_i32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reduction_switch_orderEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_14:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_15:.*]] = arith.muli %[[VAL_13]], %[[VAL_14]] : i32
-! CHECK:               hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : i32, !fir.ref<i32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@multiply_reduction_byref_i32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reduction_switch_orderEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_14:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_15:.*]] = arith.muli %[[VAL_13]], %[[VAL_14]] : i32
+! CHECK:                 hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : i32, !fir.ref<i32>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
@@ -194,15 +200,17 @@ subroutine simple_int_reduction_switch_order
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@multiply_reduction_byref_f32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reduction_switch_orderEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i32) -> f32
-! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<f32>
-! CHECK:               %[[VAL_16:.*]] = arith.mulf %[[VAL_14]], %[[VAL_15]] fastmath<contract> : f32
-! CHECK:               hlfir.assign %[[VAL_16]] to %[[VAL_12]]#0 : f32, !fir.ref<f32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@multiply_reduction_byref_f32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reduction_switch_orderEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i32) -> f32
+! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<f32>
+! CHECK:                 %[[VAL_16:.*]] = arith.mulf %[[VAL_14]], %[[VAL_15]] fastmath<contract> : f32
+! CHECK:                 hlfir.assign %[[VAL_16]] to %[[VAL_12]]#0 : f32, !fir.ref<f32>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
@@ -239,24 +247,26 @@ subroutine simple_real_reduction_switch_order
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_15:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@multiply_reduction_byref_i32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<i32>, @multiply_reduction_byref_i32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<i32>, @multiply_reduction_byref_i32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<i32>)  for  (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
-! CHECK:               fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_int_reductions_same_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_int_reductions_same_typeEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_int_reductions_same_typeEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_23:.*]] = fir.load %[[VAL_20]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_24:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_25:.*]] = arith.muli %[[VAL_23]], %[[VAL_24]] : i32
-! CHECK:               hlfir.assign %[[VAL_25]] to %[[VAL_20]]#0 : i32, !fir.ref<i32>
-! CHECK:               %[[VAL_26:.*]] = fir.load %[[VAL_21]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_27:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_28:.*]] = arith.muli %[[VAL_26]], %[[VAL_27]] : i32
-! CHECK:               hlfir.assign %[[VAL_28]] to %[[VAL_21]]#0 : i32, !fir.ref<i32>
-! CHECK:               %[[VAL_29:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_30:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_31:.*]] = arith.muli %[[VAL_29]], %[[VAL_30]] : i32
-! CHECK:               hlfir.assign %[[VAL_31]] to %[[VAL_22]]#0 : i32, !fir.ref<i32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@multiply_reduction_byref_i32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<i32>, @multiply_reduction_byref_i32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<i32>, @multiply_reduction_byref_i32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<i32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
+! CHECK:                 %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_int_reductions_same_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_int_reductions_same_typeEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_int_reductions_same_typeEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_23:.*]] = fir.load %[[VAL_20]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_24:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_25:.*]] = arith.muli %[[VAL_23]], %[[VAL_24]] : i32
+! CHECK:                 hlfir.assign %[[VAL_25]] to %[[VAL_20]]#0 : i32, !fir.ref<i32>
+! CHECK:                 %[[VAL_26:.*]] = fir.load %[[VAL_21]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_27:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_28:.*]] = arith.muli %[[VAL_26]], %[[VAL_27]] : i32
+! CHECK:                 hlfir.assign %[[VAL_28]] to %[[VAL_21]]#0 : i32, !fir.ref<i32>
+! CHECK:                 %[[VAL_29:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_30:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_31:.*]] = arith.muli %[[VAL_29]], %[[VAL_30]] : i32
+! CHECK:                 hlfir.assign %[[VAL_31]] to %[[VAL_22]]#0 : i32, !fir.ref<i32>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
@@ -297,27 +307,29 @@ subroutine multiple_int_reductions_same_type
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_15:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@multiply_reduction_byref_f32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<f32>, @multiply_reduction_byref_f32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<f32>, @multiply_reduction_byref_f32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<f32>)  for  (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
-! CHECK:               fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_real_reductions_same_typeEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-! CHECK:               %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_real_reductions_same_typeEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-! CHECK:               %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_real_reductions_same_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-! CHECK:               %[[VAL_23:.*]] = fir.load %[[VAL_20]]#0 : !fir.ref<f32>
-! CHECK:               %[[VAL_24:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (i32) -> f32
-! CHECK:               %[[VAL_26:.*]] = arith.mulf %[[VAL_23]], %[[VAL_25]] fastmath<contract> : f32
-! CHECK:               hlfir.assign %[[VAL_26]] to %[[VAL_20]]#0 : f32, !fir.ref<f32>
-! CHECK:               %[[VAL_27:.*]] = fir.load %[[VAL_21]]#0 : !fir.ref<f32>
-! CHECK:               %[[VAL_28:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (i32) -> f32
-! CHECK:               %[[VAL_30:.*]] = arith.mulf %[[VAL_27]], %[[VAL_29]] fastmath<contract> : f32
-! CHECK:               hlfir.assign %[[VAL_30]] to %[[VAL_21]]#0 : f32, !fir.ref<f32>
-! CHECK:               %[[VAL_31:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref<f32>
-! CHECK:               %[[VAL_32:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (i32) -> f32
-! CHECK:               %[[VAL_34:.*]] = arith.mulf %[[VAL_31]], %[[VAL_33]] fastmath<contract> : f32
-! CHECK:               hlfir.assign %[[VAL_34]] to %[[VAL_22]]#0 : f32, !fir.ref<f32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@multiply_reduction_byref_f32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<f32>, @multiply_reduction_byref_f32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<f32>, @multiply_reduction_byref_f32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<f32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
+! CHECK:                 %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_real_reductions_same_typeEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:                 %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_real_reductions_same_typeEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:                 %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_real_reductions_same_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:                 fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_23:.*]] = fir.load %[[VAL_20]]#0 : !fir.ref<f32>
+! CHECK:                 %[[VAL_24:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (i32) -> f32
+! CHECK:                 %[[VAL_26:.*]] = arith.mulf %[[VAL_23]], %[[VAL_25]] fastmath<contract> : f32
+! CHECK:                 hlfir.assign %[[VAL_26]] to %[[VAL_20]]#0 : f32, !fir.ref<f32>
+! CHECK:                 %[[VAL_27:.*]] = fir.load %[[VAL_21]]#0 : !fir.ref<f32>
+! CHECK:                 %[[VAL_28:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (i32) -> f32
+! CHECK:                 %[[VAL_30:.*]] = arith.mulf %[[VAL_27]], %[[VAL_29]] fastmath<contract> : f32
+! CHECK:                 hlfir.assign %[[VAL_30]] to %[[VAL_21]]#0 : f32, !fir.ref<f32>
+! CHECK:                 %[[VAL_31:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref<f32>
+! CHECK:                 %[[VAL_32:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (i32) -> f32
+! CHECK:                 %[[VAL_34:.*]] = arith.mulf %[[VAL_31]], %[[VAL_33]] fastmath<contract> : f32
+! CHECK:                 hlfir.assign %[[VAL_34]] to %[[VAL_22]]#0 : f32, !fir.ref<f32>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
@@ -362,32 +374,34 @@ subroutine multiple_real_reductions_same_type
 ! CHECK:             %[[VAL_16:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_17:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_18:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop byref reduction(@multiply_reduction_byref_i32 %[[VAL_5]]#0 -> %[[VAL_19:.*]] : !fir.ref<i32>, @multiply_reduction_byref_i64 %[[VAL_7]]#0 -> %[[VAL_20:.*]] : !fir.ref<i64>, @multiply_reduction_byref_f32 %[[VAL_9]]#0 -> %[[VAL_21:.*]] : !fir.ref<f32>, @multiply_reduction_byref_f64 %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref<f64>)  for  (%[[VAL_23:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) {
-! CHECK:               fir.store %[[VAL_23]] to %[[VAL_15]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_24:.*]]:2 = hlfir.declare %[[VAL_19]] {uniq_name = "_QFmultiple_reductions_different_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_25:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFmultiple_reductions_different_typeEy"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
-! CHECK:               %[[VAL_26:.*]]:2 = hlfir.declare %[[VAL_21]] {uniq_name = "_QFmultiple_reductions_different_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-! CHECK:               %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_22]] {uniq_name = "_QFmultiple_reductions_different_typeEw"} : (!fir.ref<f64>) -> (!fir.ref<f64>, !fir.ref<f64>)
-! CHECK:               %[[VAL_28:.*]] = fir.load %[[VAL_24]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_29:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_30:.*]] = arith.muli %[[VAL_28]], %[[VAL_29]] : i32
-! CHECK:               hlfir.assign %[[VAL_30]] to %[[VAL_24]]#0 : i32, !fir.ref<i32>
-! CHECK:               %[[VAL_31:.*]] = fir.load %[[VAL_25]]#0 : !fir.ref<i64>
-! CHECK:               %[[VAL_32:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (i32) -> i64
-! CHECK:               %[[VAL_34:.*]] = arith.muli %[[VAL_31]], %[[VAL_33]] : i64
-! CHECK:               hlfir.assign %[[VAL_34]] to %[[VAL_25]]#0 : i64, !fir.ref<i64>
-! CHECK:               %[[VAL_35:.*]] = fir.load %[[VAL_26]]#0 : !fir.ref<f32>
-! CHECK:               %[[VAL_36:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_37:.*]] = fir.convert %[[VAL_36]] : (i32) -> f32
-! CHECK:               %[[VAL_38:.*]] = arith.mulf %[[VAL_35]], %[[VAL_37]] fastmath<contract> : f32
-! CHECK:               hlfir.assign %[[VAL_38]] to %[[VAL_26]]#0 : f32, !fir.ref<f32>
-! CHECK:               %[[VAL_39:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref<f64>
-! CHECK:               %[[VAL_40:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> f64
-! CHECK:               %[[VAL_42:.*]] = arith.mulf %[[VAL_39]], %[[VAL_41]] fastmath<contract> : f64
-! CHECK:               hlfir.assign %[[VAL_42]] to %[[VAL_27]]#0 : f64, !fir.ref<f64>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop byref reduction(@multiply_reduction_byref_i32 %[[VAL_5]]#0 -> %[[VAL_19:.*]] : !fir.ref<i32>, @multiply_reduction_byref_i64 %[[VAL_7]]#0 -> %[[VAL_20:.*]] : !fir.ref<i64>, @multiply_reduction_byref_f32 %[[VAL_9]]#0 -> %[[VAL_21:.*]] : !fir.ref<f32>, @multiply_reduction_byref_f64 %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref<f64>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_23:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) {
+! CHECK:                 %[[VAL_24:.*]]:2 = hlfir.declare %[[VAL_19]] {uniq_name = "_QFmultiple_reductions_different_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 %[[VAL_25:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFmultiple_reductions_different_typeEy"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
+! CHECK:                 %[[VAL_26:.*]]:2 = hlfir.declare %[[VAL_21]] {uniq_name = "_QFmultiple_reductions_different_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:                 %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_22]] {uniq_name = "_QFmultiple_reductions_different_typeEw"} : (!fir.ref<f64>) -> (!fir.ref<f64>, !fir.ref<f64>)
+! CHECK:                 fir.store %[[VAL_23]] to %[[VAL_15]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_28:.*]] = fir.load %[[VAL_24]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_29:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_30:.*]] = arith.muli %[[VAL_28]], %[[VAL_29]] : i32
+! CHECK:                 hlfir.assign %[[VAL_30]] to %[[VAL_24]]#0 : i32, !fir.ref<i32>
+! CHECK:                 %[[VAL_31:.*]] = fir.load %[[VAL_25]]#0 : !fir.ref<i64>
+! CHECK:                 %[[VAL_32:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (i32) -> i64
+! CHECK:                 %[[VAL_34:.*]] = arith.muli %[[VAL_31]], %[[VAL_33]] : i64
+! CHECK:                 hlfir.assign %[[VAL_34]] to %[[VAL_25]]#0 : i64, !fir.ref<i64>
+! CHECK:                 %[[VAL_35:.*]] = fir.load %[[VAL_26]]#0 : !fir.ref<f32>
+! CHECK:                 %[[VAL_36:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_37:.*]] = fir.convert %[[VAL_36]] : (i32) -> f32
+! CHECK:                 %[[VAL_38:.*]] = arith.mulf %[[VAL_35]], %[[VAL_37]] fastmath<contract> : f32
+! CHECK:                 hlfir.assign %[[VAL_38]] to %[[VAL_26]]#0 : f32, !fir.ref<f32>
+! CHECK:                 %[[VAL_39:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref<f64>
+! CHECK:                 %[[VAL_40:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> f64
+! CHECK:                 %[[VAL_42:.*]] = arith.mulf %[[VAL_39]], %[[VAL_41]] fastmath<contract> : f64
+! CHECK:                 hlfir.assign %[[VAL_42]] to %[[VAL_27]]#0 : f64, !fir.ref<f64>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-mul.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-mul.f90
index 90d9aa5e839bde..08be4d84c1a62f 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-mul.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-mul.f90
@@ -60,14 +60,16 @@
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@multiply_reduction_i32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_14:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_15:.*]] = arith.muli %[[VAL_13]], %[[VAL_14]] : i32
-! CHECK:               hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : i32, !fir.ref<i32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@multiply_reduction_i32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_14:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_15:.*]] = arith.muli %[[VAL_13]], %[[VAL_14]] : i32
+! CHECK:                 hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : i32, !fir.ref<i32>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
@@ -96,15 +98,17 @@ subroutine simple_int_reduction
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@multiply_reduction_f32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reductionEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<f32>
-! CHECK:               %[[VAL_14:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i32) -> f32
-! CHECK:               %[[VAL_16:.*]] = arith.mulf %[[VAL_13]], %[[VAL_15]] fastmath<contract> : f32
-! CHECK:               hlfir.assign %[[VAL_16]] to %[[VAL_12]]#0 : f32, !fir.ref<f32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@multiply_reduction_f32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reductionEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<f32>
+! CHECK:                 %[[VAL_14:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i32) -> f32
+! CHECK:                 %[[VAL_16:.*]] = arith.mulf %[[VAL_13]], %[[VAL_15]] fastmath<contract> : f32
+! CHECK:                 hlfir.assign %[[VAL_16]] to %[[VAL_12]]#0 : f32, !fir.ref<f32>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
@@ -133,14 +137,16 @@ subroutine simple_real_reduction
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@multiply_reduction_i32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reduction_switch_orderEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_14:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_15:.*]] = arith.muli %[[VAL_13]], %[[VAL_14]] : i32
-! CHECK:               hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : i32, !fir.ref<i32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@multiply_reduction_i32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reduction_switch_orderEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_14:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_15:.*]] = arith.muli %[[VAL_13]], %[[VAL_14]] : i32
+! CHECK:                 hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : i32, !fir.ref<i32>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
@@ -169,15 +175,17 @@ subroutine simple_int_reduction_switch_order
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@multiply_reduction_f32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
-! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reduction_switch_orderEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i32) -> f32
-! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<f32>
-! CHECK:               %[[VAL_16:.*]] = arith.mulf %[[VAL_14]], %[[VAL_15]] fastmath<contract> : f32
-! CHECK:               hlfir.assign %[[VAL_16]] to %[[VAL_12]]#0 : f32, !fir.ref<f32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@multiply_reduction_f32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:                 %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reduction_switch_orderEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:                 fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i32) -> f32
+! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<f32>
+! CHECK:                 %[[VAL_16:.*]] = arith.mulf %[[VAL_14]], %[[VAL_15]] fastmath<contract> : f32
+! CHECK:                 hlfir.assign %[[VAL_16]] to %[[VAL_12]]#0 : f32, !fir.ref<f32>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
@@ -214,24 +222,26 @@ subroutine simple_real_reduction_switch_order
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_15:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@multiply_reduction_i32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<i32>, @multiply_reduction_i32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<i32>, @multiply_reduction_i32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<i32>)  for  (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
-! CHECK:               fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_int_reductions_same_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_int_reductions_same_typeEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_int_reductions_same_typeEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_23:.*]] = fir.load %[[VAL_20]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_24:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_25:.*]] = arith.muli %[[VAL_23]], %[[VAL_24]] : i32
-! CHECK:               hlfir.assign %[[VAL_25]] to %[[VAL_20]]#0 : i32, !fir.ref<i32>
-! CHECK:               %[[VAL_26:.*]] = fir.load %[[VAL_21]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_27:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_28:.*]] = arith.muli %[[VAL_26]], %[[VAL_27]] : i32
-! CHECK:               hlfir.assign %[[VAL_28]] to %[[VAL_21]]#0 : i32, !fir.ref<i32>
-! CHECK:               %[[VAL_29:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_30:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_31:.*]] = arith.muli %[[VAL_29]], %[[VAL_30]] : i32
-! CHECK:               hlfir.assign %[[VAL_31]] to %[[VAL_22]]#0 : i32, !fir.ref<i32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@multiply_reduction_i32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<i32>, @multiply_reduction_i32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<i32>, @multiply_reduction_i32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<i32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
+! CHECK:                 %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_int_reductions_same_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_int_reductions_same_typeEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_int_reductions_same_typeEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_23:.*]] = fir.load %[[VAL_20]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_24:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_25:.*]] = arith.muli %[[VAL_23]], %[[VAL_24]] : i32
+! CHECK:                 hlfir.assign %[[VAL_25]] to %[[VAL_20]]#0 : i32, !fir.ref<i32>
+! CHECK:                 %[[VAL_26:.*]] = fir.load %[[VAL_21]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_27:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_28:.*]] = arith.muli %[[VAL_26]], %[[VAL_27]] : i32
+! CHECK:                 hlfir.assign %[[VAL_28]] to %[[VAL_21]]#0 : i32, !fir.ref<i32>
+! CHECK:                 %[[VAL_29:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_30:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_31:.*]] = arith.muli %[[VAL_29]], %[[VAL_30]] : i32
+! CHECK:                 hlfir.assign %[[VAL_31]] to %[[VAL_22]]#0 : i32, !fir.ref<i32>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
@@ -272,27 +282,29 @@ subroutine multiple_int_reductions_same_type
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_15:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@multiply_reduction_f32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<f32>, @multiply_reduction_f32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<f32>, @multiply_reduction_f32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<f32>)  for  (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
-! CHECK:               fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_real_reductions_same_typeEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-! CHECK:               %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_real_reductions_same_typeEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-! CHECK:               %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_real_reductions_same_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-! CHECK:               %[[VAL_23:.*]] = fir.load %[[VAL_20]]#0 : !fir.ref<f32>
-! CHECK:               %[[VAL_24:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (i32) -> f32
-! CHECK:               %[[VAL_26:.*]] = arith.mulf %[[VAL_23]], %[[VAL_25]] fastmath<contract> : f32
-! CHECK:               hlfir.assign %[[VAL_26]] to %[[VAL_20]]#0 : f32, !fir.ref<f32>
-! CHECK:               %[[VAL_27:.*]] = fir.load %[[VAL_21]]#0 : !fir.ref<f32>
-! CHECK:               %[[VAL_28:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (i32) -> f32
-! CHECK:               %[[VAL_30:.*]] = arith.mulf %[[VAL_27]], %[[VAL_29]] fastmath<contract> : f32
-! CHECK:               hlfir.assign %[[VAL_30]] to %[[VAL_21]]#0 : f32, !fir.ref<f32>
-! CHECK:               %[[VAL_31:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref<f32>
-! CHECK:               %[[VAL_32:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (i32) -> f32
-! CHECK:               %[[VAL_34:.*]] = arith.mulf %[[VAL_31]], %[[VAL_33]] fastmath<contract> : f32
-! CHECK:               hlfir.assign %[[VAL_34]] to %[[VAL_22]]#0 : f32, !fir.ref<f32>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@multiply_reduction_f32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<f32>, @multiply_reduction_f32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<f32>, @multiply_reduction_f32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<f32>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
+! CHECK:                 %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_real_reductions_same_typeEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:                 %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_real_reductions_same_typeEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:                 %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_real_reductions_same_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:                 fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_23:.*]] = fir.load %[[VAL_20]]#0 : !fir.ref<f32>
+! CHECK:                 %[[VAL_24:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (i32) -> f32
+! CHECK:                 %[[VAL_26:.*]] = arith.mulf %[[VAL_23]], %[[VAL_25]] fastmath<contract> : f32
+! CHECK:                 hlfir.assign %[[VAL_26]] to %[[VAL_20]]#0 : f32, !fir.ref<f32>
+! CHECK:                 %[[VAL_27:.*]] = fir.load %[[VAL_21]]#0 : !fir.ref<f32>
+! CHECK:                 %[[VAL_28:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (i32) -> f32
+! CHECK:                 %[[VAL_30:.*]] = arith.mulf %[[VAL_27]], %[[VAL_29]] fastmath<contract> : f32
+! CHECK:                 hlfir.assign %[[VAL_30]] to %[[VAL_21]]#0 : f32, !fir.ref<f32>
+! CHECK:                 %[[VAL_31:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref<f32>
+! CHECK:                 %[[VAL_32:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (i32) -> f32
+! CHECK:                 %[[VAL_34:.*]] = arith.mulf %[[VAL_31]], %[[VAL_33]] fastmath<contract> : f32
+! CHECK:                 hlfir.assign %[[VAL_34]] to %[[VAL_22]]#0 : f32, !fir.ref<f32>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
@@ -337,32 +349,34 @@ subroutine multiple_real_reductions_same_type
 ! CHECK:             %[[VAL_16:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_17:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_18:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@multiply_reduction_i32 %[[VAL_5]]#0 -> %[[VAL_19:.*]] : !fir.ref<i32>, @multiply_reduction_i64 %[[VAL_7]]#0 -> %[[VAL_20:.*]] : !fir.ref<i64>, @multiply_reduction_f32 %[[VAL_9]]#0 -> %[[VAL_21:.*]] : !fir.ref<f32>, @multiply_reduction_f64 %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref<f64>)  for  (%[[VAL_23:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) {
-! CHECK:               fir.store %[[VAL_23]] to %[[VAL_15]]#1 : !fir.ref<i32>
-! CHECK:               %[[VAL_24:.*]]:2 = hlfir.declare %[[VAL_19]] {uniq_name = "_QFmultiple_reductions_different_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               %[[VAL_25:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFmultiple_reductions_different_typeEy"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
-! CHECK:               %[[VAL_26:.*]]:2 = hlfir.declare %[[VAL_21]] {uniq_name = "_QFmultiple_reductions_different_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-! CHECK:               %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_22]] {uniq_name = "_QFmultiple_reductions_different_typeEw"} : (!fir.ref<f64>) -> (!fir.ref<f64>, !fir.ref<f64>)
-! CHECK:               %[[VAL_28:.*]] = fir.load %[[VAL_24]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_29:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_30:.*]] = arith.muli %[[VAL_28]], %[[VAL_29]] : i32
-! CHECK:               hlfir.assign %[[VAL_30]] to %[[VAL_24]]#0 : i32, !fir.ref<i32>
-! CHECK:               %[[VAL_31:.*]] = fir.load %[[VAL_25]]#0 : !fir.ref<i64>
-! CHECK:               %[[VAL_32:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (i32) -> i64
-! CHECK:               %[[VAL_34:.*]] = arith.muli %[[VAL_31]], %[[VAL_33]] : i64
-! CHECK:               hlfir.assign %[[VAL_34]] to %[[VAL_25]]#0 : i64, !fir.ref<i64>
-! CHECK:               %[[VAL_35:.*]] = fir.load %[[VAL_26]]#0 : !fir.ref<f32>
-! CHECK:               %[[VAL_36:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_37:.*]] = fir.convert %[[VAL_36]] : (i32) -> f32
-! CHECK:               %[[VAL_38:.*]] = arith.mulf %[[VAL_35]], %[[VAL_37]] fastmath<contract> : f32
-! CHECK:               hlfir.assign %[[VAL_38]] to %[[VAL_26]]#0 : f32, !fir.ref<f32>
-! CHECK:               %[[VAL_39:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref<f64>
-! CHECK:               %[[VAL_40:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
-! CHECK:               %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> f64
-! CHECK:               %[[VAL_42:.*]] = arith.mulf %[[VAL_39]], %[[VAL_41]] fastmath<contract> : f64
-! CHECK:               hlfir.assign %[[VAL_42]] to %[[VAL_27]]#0 : f64, !fir.ref<f64>
-! CHECK:               omp.yield
+! CHECK:             omp.wsloop reduction(@multiply_reduction_i32 %[[VAL_5]]#0 -> %[[VAL_19:.*]] : !fir.ref<i32>, @multiply_reduction_i64 %[[VAL_7]]#0 -> %[[VAL_20:.*]] : !fir.ref<i64>, @multiply_reduction_f32 %[[VAL_9]]#0 -> %[[VAL_21:.*]] : !fir.ref<f32>, @multiply_reduction_f64 %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref<f64>) {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_23:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) {
+! CHECK:                 %[[VAL_24:.*]]:2 = hlfir.declare %[[VAL_19]] {uniq_name = "_QFmultiple_reductions_different_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 %[[VAL_25:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFmultiple_reductions_different_typeEy"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
+! CHECK:                 %[[VAL_26:.*]]:2 = hlfir.declare %[[VAL_21]] {uniq_name = "_QFmultiple_reductions_different_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:                 %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_22]] {uniq_name = "_QFmultiple_reductions_different_typeEw"} : (!fir.ref<f64>) -> (!fir.ref<f64>, !fir.ref<f64>)
+! CHECK:                 fir.store %[[VAL_23]] to %[[VAL_15]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_28:.*]] = fir.load %[[VAL_24]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_29:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_30:.*]] = arith.muli %[[VAL_28]], %[[VAL_29]] : i32
+! CHECK:                 hlfir.assign %[[VAL_30]] to %[[VAL_24]]#0 : i32, !fir.ref<i32>
+! CHECK:                 %[[VAL_31:.*]] = fir.load %[[VAL_25]]#0 : !fir.ref<i64>
+! CHECK:                 %[[VAL_32:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (i32) -> i64
+! CHECK:                 %[[VAL_34:.*]] = arith.muli %[[VAL_31]], %[[VAL_33]] : i64
+! CHECK:                 hlfir.assign %[[VAL_34]] to %[[VAL_25]]#0 : i64, !fir.ref<i64>
+! CHECK:                 %[[VAL_35:.*]] = fir.load %[[VAL_26]]#0 : !fir.ref<f32>
+! CHECK:                 %[[VAL_36:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_37:.*]] = fir.convert %[[VAL_36]] : (i32) -> f32
+! CHECK:                 %[[VAL_38:.*]] = arith.mulf %[[VAL_35]], %[[VAL_37]] fastmath<contract> : f32
+! CHECK:                 hlfir.assign %[[VAL_38]] to %[[VAL_26]]#0 : f32, !fir.ref<f32>
+! CHECK:                 %[[VAL_39:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref<f64>
+! CHECK:                 %[[VAL_40:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> f64
+! CHECK:                 %[[VAL_42:.*]] = arith.mulf %[[VAL_39]], %[[VAL_41]] fastmath<contract> : f64
+! CHECK:                 hlfir.assign %[[VAL_42]] to %[[VAL_27]]#0 : f64, !fir.ref<f64>
+! CHECK:                 omp.yield
+! CHECK:               omp.terminator
 ! CHECK:             omp.terminator
 ! CHECK:           return
 
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-multi.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-multi.f90
index 9e9951c399c920..429253efdc8090 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-multi.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-multi.f90
@@ -35,31 +35,34 @@
 !CHECK: }
 
 !CHECK-LABEL: func.func @_QPmultiple_reduction
-!CHECK:  %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_reductionEx"}
-!CHECK:  %[[X_DECL:.*]]:2 = hlfir.declare %[[X_REF]] {uniq_name = "_QFmultiple_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK:  %[[Y_REF:.*]] = fir.alloca f32 {bindc_name = "y", uniq_name = "_QFmultiple_reductionEy"}
-!CHECK:  %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y_REF]] {uniq_name = "_QFmultiple_reductionEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-!CHECK:  %[[Z_REF:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFmultiple_reductionEz"}
-!CHECK:  %[[Z_DECL:.*]]:2 = hlfir.declare %[[Z_REF]] {uniq_name = "_QFmultiple_reductionEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK:  omp.wsloop reduction(
+!CHECK:      %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_reductionEx"}
+!CHECK:      %[[X_DECL:.*]]:2 = hlfir.declare %[[X_REF]] {uniq_name = "_QFmultiple_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:      %[[Y_REF:.*]] = fir.alloca f32 {bindc_name = "y", uniq_name = "_QFmultiple_reductionEy"}
+!CHECK:      %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y_REF]] {uniq_name = "_QFmultiple_reductionEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+!CHECK:      %[[Z_REF:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFmultiple_reductionEz"}
+!CHECK:      %[[Z_DECL:.*]]:2 = hlfir.declare %[[Z_REF]] {uniq_name = "_QFmultiple_reductionEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:      omp.wsloop reduction(
 !CHECK-SAME: @[[ADD_RED_I32_NAME]] %[[X_DECL]]#0 -> %[[PRV_X:.+]] : !fir.ref<i32>,
 !CHECK-SAME: @[[ADD_RED_F32_NAME]] %[[Y_DECL]]#0 -> %[[PRV_Y:.+]] : !fir.ref<f32>,
-!CHECK-SAME: @[[MIN_RED_I32_NAME]] %[[Z_DECL]]#0 -> %[[PRV_Z:.+]] : !fir.ref<i32>) {{.*}}{
-!CHECK:    %[[PRV_X_DECL:.+]]:2 = hlfir.declare %[[PRV_X]] {{.*}} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK:    %[[PRV_Y_DECL:.+]]:2 = hlfir.declare %[[PRV_Y]] {{.*}} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-!CHECK:    %[[PRV_Z_DECL:.+]]:2 = hlfir.declare %[[PRV_Z]] {{.*}} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK:    %[[LPRV_X:.+]] = fir.load %[[PRV_X_DECL]]#0 : !fir.ref<i32>
-!CHECK:    %[[RES_X:.+]] = arith.addi %[[LPRV_X]], %{{.+}} : i32
-!CHECK:    hlfir.assign %[[RES_X]] to %[[PRV_X_DECL]]#0 : i32, !fir.ref<i32>
-!CHECK:    %[[LPRV_Y:.+]] = fir.load %[[PRV_Y_DECL]]#0 : !fir.ref<f32>
-!CHECK:    %[[RES_Y:.+]] = arith.addf %[[LPRV_Y]], %{{.+}} : f32
-!CHECK:    hlfir.assign %[[RES_Y]] to %[[PRV_Y_DECL]]#0 : f32, !fir.ref<f32>
-!CHECK:    %[[LPRV_Z:.+]] = fir.load %[[PRV_Z_DECL]]#0 : !fir.ref<i32>
-!CHECK:    %[[RES_Z:.+]] = arith.select %{{.+}}, %[[LPRV_Z]], %{{.+}} : i32
-!CHECK:    hlfir.assign %[[RES_Z]] to %[[PRV_Z_DECL]]#0 : i32, !fir.ref<i32>
-!CHECK:    omp.yield
-!CHECK:  }
-!CHECK: return
+!CHECK-SAME: @[[MIN_RED_I32_NAME]] %[[Z_DECL]]#0 -> %[[PRV_Z:.+]] : !fir.ref<i32>) {
+!CHECK-NEXT:   omp.loop_nest {{.*}} {
+!CHECK:          %[[PRV_X_DECL:.+]]:2 = hlfir.declare %[[PRV_X]] {{.*}} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:          %[[PRV_Y_DECL:.+]]:2 = hlfir.declare %[[PRV_Y]] {{.*}} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+!CHECK:          %[[PRV_Z_DECL:.+]]:2 = hlfir.declare %[[PRV_Z]] {{.*}} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:          %[[LPRV_X:.+]] = fir.load %[[PRV_X_DECL]]#0 : !fir.ref<i32>
+!CHECK:          %[[RES_X:.+]] = arith.addi %[[LPRV_X]], %{{.+}} : i32
+!CHECK:          hlfir.assign %[[RES_X]] to %[[PRV_X_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK:          %[[LPRV_Y:.+]] = fir.load %[[PRV_Y_DECL]]#0 : !fir.ref<f32>
+!CHECK:          %[[RES_Y:.+]] = arith.addf %[[LPRV_Y]], %{{.+}} : f32
+!CHECK:          hlfir.assign %[[RES_Y]] to %[[PRV_Y_DECL]]#0 : f32, !fir.ref<f32>
+!CHECK:          %[[LPRV_Z:.+]] = fir.load %[[PRV_Z_DECL]]#0 : !fir.ref<i32>
+!CHECK:          %[[RES_Z:.+]] = arith.select %{{.+}}, %[[LPRV_Z]], %{{.+}} : i32
+!CHECK:          hlfir.assign %[[RES_Z]] to %[[PRV_Z_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK:          omp.yield
+!CHECK:        }
+!CHECK:        omp.terminator
+!CHECK:      }
+!CHECK:      return
 subroutine multiple_reduction(v)
   implicit none
   integer, intent(in) :: v(:)
diff --git a/flang/test/Lower/OpenMP/wsloop-simd.f90 b/flang/test/Lower/OpenMP/wsloop-simd.f90
index c3d5e3e0cda593..1df67474d65e3b 100644
--- a/flang/test/Lower/OpenMP/wsloop-simd.f90
+++ b/flang/test/Lower/OpenMP/wsloop-simd.f90
@@ -11,23 +11,26 @@ program wsloop_dynamic
 !CHECK:  omp.parallel {
 
 !$OMP DO SCHEDULE(simd: runtime)
-!CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
-!CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
-!CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-!CHECK:     omp.wsloop schedule(runtime, simd) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
-!CHECK:       fir.store %[[I]] to %[[STORE:.*]]#1 : !fir.ref<i32>
+!CHECK:      %[[WS_LB:.*]] = arith.constant 1 : i32
+!CHECK:      %[[WS_UB:.*]] = arith.constant 9 : i32
+!CHECK:      %[[WS_STEP:.*]] = arith.constant 1 : i32
+!CHECK:      omp.wsloop schedule(runtime, simd) nowait {
+!CHECK-NEXT:   omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
+!CHECK:          fir.store %[[I]] to %[[STORE:.*]]#1 : !fir.ref<i32>
 
   do i=1, 9
     print*, i
-!CHECK:    %[[RTBEGIN:.*]] = fir.call @_FortranAioBeginExternalListOutput
-!CHECK:    %[[LOAD:.*]] = fir.load %[[STORE]]#0 : !fir.ref<i32>
-!CHECK:    fir.call @_FortranAioOutputInteger32(%[[RTBEGIN]], %[[LOAD]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
-!CHECK:    fir.call @_FortranAioEndIoStatement(%[[RTBEGIN]]) {{.*}}: (!fir.ref<i8>) -> i32
+!CHECK:          %[[RTBEGIN:.*]] = fir.call @_FortranAioBeginExternalListOutput
+!CHECK:          %[[LOAD:.*]] = fir.load %[[STORE]]#0 : !fir.ref<i32>
+!CHECK:          fir.call @_FortranAioOutputInteger32(%[[RTBEGIN]], %[[LOAD]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+!CHECK:          fir.call @_FortranAioEndIoStatement(%[[RTBEGIN]]) {{.*}}: (!fir.ref<i8>) -> i32
   end do
-!CHECK:       omp.yield
-!CHECK:         }
-!CHECK:       omp.terminator
-!CHECK:     }
+!CHECK:          omp.yield
+!CHECK:        }
+!CHECK:        omp.terminator
+!CHECK:      }
+!CHECK:      omp.terminator
+!CHECK:    }
 
 !$OMP END DO NOWAIT
 !$OMP END PARALLEL
diff --git a/flang/test/Lower/OpenMP/wsloop-unstructured.f90 b/flang/test/Lower/OpenMP/wsloop-unstructured.f90
index 7fe63a1fe607c2..bd6a0bade8c7ee 100644
--- a/flang/test/Lower/OpenMP/wsloop-unstructured.f90
+++ b/flang/test/Lower/OpenMP/wsloop-unstructured.f90
@@ -29,29 +29,32 @@ end subroutine sub
 ! CHECK-SAME:                      %[[VAL_2:.*]]: !fir.ref<!fir.array<?x?xf32>> {fir.bindc_name = "x"},
 ! CHECK-SAME:                      %[[VAL_3:.*]]: !fir.ref<!fir.array<?x?xf32>> {fir.bindc_name = "y"}) {
 ! [...]
-! CHECK:             omp.wsloop for  (%[[VAL_53:.*]], %[[VAL_54:.*]]) : i32 = ({{.*}}) to ({{.*}}) inclusive step ({{.*}}) {
+! CHECK:             omp.wsloop {
+! CHECK-NEXT:          omp.loop_nest (%[[VAL_53:.*]], %[[VAL_54:.*]]) : i32 = ({{.*}}) to ({{.*}}) inclusive step ({{.*}}) {
 ! [...]
-! CHECK:               cf.br ^bb1
-! CHECK:             ^bb1:
-! CHECK:               cf.br ^bb2
-! CHECK:             ^bb2:
+! CHECK:                 cf.br ^bb1
+! CHECK:               ^bb1:
+! CHECK:                 cf.br ^bb2
+! CHECK:               ^bb2:
 ! [...]
-! CHECK:               cf.br ^bb3
-! CHECK:             ^bb3:
+! CHECK:                 cf.br ^bb3
+! CHECK:               ^bb3:
 ! [...]
-! CHECK:               %[[VAL_63:.*]] = arith.cmpi sgt, %{{.*}}, %{{.*}} : i32
-! CHECK:               cf.cond_br %[[VAL_63]], ^bb4, ^bb7
-! CHECK:             ^bb4:
+! CHECK:                 %[[VAL_63:.*]] = arith.cmpi sgt, %{{.*}}, %{{.*}} : i32
+! CHECK:                 cf.cond_br %[[VAL_63]], ^bb4, ^bb7
+! CHECK:               ^bb4:
 ! [...]
-! CHECK:               %[[VAL_76:.*]] = arith.cmpf olt, %{{.*}}, %{{.*}} fastmath<contract> : f32
-! CHECK:               cf.cond_br %[[VAL_76]], ^bb5, ^bb6
-! CHECK:             ^bb5:
-! CHECK:               cf.br ^bb7
-! CHECK:             ^bb6:
+! CHECK:                 %[[VAL_76:.*]] = arith.cmpf olt, %{{.*}}, %{{.*}} fastmath<contract> : f32
+! CHECK:                 cf.cond_br %[[VAL_76]], ^bb5, ^bb6
+! CHECK:               ^bb5:
+! CHECK:                 cf.br ^bb7
+! CHECK:               ^bb6:
 ! [...]
-! CHECK:               cf.br ^bb3
-! CHECK:             ^bb7:
-! CHECK:               omp.yield
+! CHECK:                 cf.br ^bb3
+! CHECK:               ^bb7:
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
 ! CHECK:             }
 ! CHECK:             omp.terminator
 ! CHECK:           }
diff --git a/flang/test/Lower/OpenMP/wsloop-variable.f90 b/flang/test/Lower/OpenMP/wsloop-variable.f90
index b3758f1fdc00ff..4d83b332880365 100644
--- a/flang/test/Lower/OpenMP/wsloop-variable.f90
+++ b/flang/test/Lower/OpenMP/wsloop-variable.f90
@@ -14,26 +14,29 @@ program wsloop_variable
   integer(kind=16) :: i16, i16_lb
   real :: x
 
-!CHECK:  %[[TMP0:.*]] = arith.constant 1 : i32
-!CHECK:  %[[TMP1:.*]] = arith.constant 100 : i32
-!CHECK:  %[[TMP2:.*]] = fir.convert %[[TMP0]] : (i32) -> i64
-!CHECK:  %[[TMP3:.*]] = fir.convert %{{.*}} : (i8) -> i64
-!CHECK:  %[[TMP4:.*]] = fir.convert %{{.*}} : (i16) -> i64
-!CHECK:  %[[TMP5:.*]] = fir.convert %{{.*}} : (i128) -> i64
-!CHECK:  %[[TMP6:.*]] = fir.convert %[[TMP1]] : (i32) -> i64
-!CHECK:  %[[TMP7:.*]] = fir.convert %{{.*}} : (i32) -> i64
-!CHECK:  omp.wsloop for (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) {
-!CHECK:    %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i64) -> i16
-!CHECK:    fir.store %[[ARG0_I16]] to %[[STORE_IV0:.*]]#1 : !fir.ref<i16>
-!CHECK:    fir.store %[[ARG1]] to %[[STORE_IV1:.*]]#1 : !fir.ref<i64>
-!CHECK:    %[[LOAD_IV0:.*]] = fir.load %[[STORE_IV0]]#0 : !fir.ref<i16>
-!CHECK:    %[[LOAD_IV0_I64:.*]] = fir.convert %[[LOAD_IV0]] : (i16) -> i64
-!CHECK:    %[[LOAD_IV1:.*]] = fir.load %[[STORE_IV1]]#0 : !fir.ref<i64>
-!CHECK:    %[[TMP10:.*]] = arith.addi %[[LOAD_IV0_I64]], %[[LOAD_IV1]] : i64
-!CHECK:    %[[TMP11:.*]] = fir.convert %[[TMP10]] : (i64) -> f32
-!CHECK:    hlfir.assign %[[TMP11]] to %{{.*}} : f32, !fir.ref<f32>
-!CHECK:    omp.yield
-!CHECK:  }
+!CHECK:      %[[TMP0:.*]] = arith.constant 1 : i32
+!CHECK:      %[[TMP1:.*]] = arith.constant 100 : i32
+!CHECK:      %[[TMP2:.*]] = fir.convert %[[TMP0]] : (i32) -> i64
+!CHECK:      %[[TMP3:.*]] = fir.convert %{{.*}} : (i8) -> i64
+!CHECK:      %[[TMP4:.*]] = fir.convert %{{.*}} : (i16) -> i64
+!CHECK:      %[[TMP5:.*]] = fir.convert %{{.*}} : (i128) -> i64
+!CHECK:      %[[TMP6:.*]] = fir.convert %[[TMP1]] : (i32) -> i64
+!CHECK:      %[[TMP7:.*]] = fir.convert %{{.*}} : (i32) -> i64
+!CHECK:      omp.wsloop {
+!CHECK-NEXT:   omp.loop_nest (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) {
+!CHECK:          %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i64) -> i16
+!CHECK:          fir.store %[[ARG0_I16]] to %[[STORE_IV0:.*]]#1 : !fir.ref<i16>
+!CHECK:          fir.store %[[ARG1]] to %[[STORE_IV1:.*]]#1 : !fir.ref<i64>
+!CHECK:          %[[LOAD_IV0:.*]] = fir.load %[[STORE_IV0]]#0 : !fir.ref<i16>
+!CHECK:          %[[LOAD_IV0_I64:.*]] = fir.convert %[[LOAD_IV0]] : (i16) -> i64
+!CHECK:          %[[LOAD_IV1:.*]] = fir.load %[[STORE_IV1]]#0 : !fir.ref<i64>
+!CHECK:          %[[TMP10:.*]] = arith.addi %[[LOAD_IV0_I64]], %[[LOAD_IV1]] : i64
+!CHECK:          %[[TMP11:.*]] = fir.convert %[[TMP10]] : (i64) -> f32
+!CHECK:          hlfir.assign %[[TMP11]] to %{{.*}} : f32, !fir.ref<f32>
+!CHECK:          omp.yield
+!CHECK:        }
+!CHECK:        omp.terminator
+!CHECK:      }
 
   !$omp do collapse(2)
   do i2 = 1, i1_ub, i2_s
@@ -43,17 +46,20 @@ program wsloop_variable
   end do
   !$omp end do
 
-!CHECK:  %[[TMP12:.*]] = arith.constant 1 : i32
-!CHECK:  %[[TMP13:.*]] = fir.convert %{{.*}} : (i8) -> i32
-!CHECK:  %[[TMP14:.*]] = fir.convert %{{.*}} : (i64) -> i32
-!CHECK:  omp.wsloop for (%[[ARG0:.*]]) : i32 = (%[[TMP12]]) to (%[[TMP13]]) inclusive step (%[[TMP14]])  {
-!CHECK:    %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i32) -> i16
-!CHECK:    fir.store %[[ARG0_I16]] to %[[STORE3:.*]]#1 : !fir.ref<i16>
-!CHECK:    %[[LOAD3:.*]] = fir.load %[[STORE3]]#0 : !fir.ref<i16>
-!CHECK:    %[[TMP16:.*]] = fir.convert %[[LOAD3]] : (i16) -> f32
-!CHECK:    hlfir.assign %[[TMP16]] to %{{.*}} : f32, !fir.ref<f32>
-!CHECK:    omp.yield
-!CHECK:  }
+!CHECK:      %[[TMP12:.*]] = arith.constant 1 : i32
+!CHECK:      %[[TMP13:.*]] = fir.convert %{{.*}} : (i8) -> i32
+!CHECK:      %[[TMP14:.*]] = fir.convert %{{.*}} : (i64) -> i32
+!CHECK:      omp.wsloop {
+!CHECK-NEXT:   omp.loop_nest (%[[ARG0:.*]]) : i32 = (%[[TMP12]]) to (%[[TMP13]]) inclusive step (%[[TMP14]]) {
+!CHECK:          %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i32) -> i16
+!CHECK:          fir.store %[[ARG0_I16]] to %[[STORE3:.*]]#1 : !fir.ref<i16>
+!CHECK:          %[[LOAD3:.*]] = fir.load %[[STORE3]]#0 : !fir.ref<i16>
+!CHECK:          %[[TMP16:.*]] = fir.convert %[[LOAD3]] : (i16) -> f32
+!CHECK:          hlfir.assign %[[TMP16]] to %{{.*}} : f32, !fir.ref<f32>
+!CHECK:          omp.yield
+!CHECK:        }
+!CHECK:        omp.terminator
+!CHECK:      }
 
   !$omp do
   do i2 = 1, i1_ub, i8_s
@@ -61,17 +67,20 @@ program wsloop_variable
   end do
   !$omp end do
 
-!CHECK:  %[[TMP17:.*]] = fir.convert %{{.*}} : (i8) -> i64
-!CHECK:  %[[TMP18:.*]] = fir.convert %{{.*}} : (i16) -> i64
-!CHECK:  %[[TMP19:.*]] = fir.convert %{{.*}} : (i32) -> i64
-!CHECK:  omp.wsloop for (%[[ARG1:.*]]) : i64 = (%[[TMP17]]) to (%[[TMP18]]) inclusive step (%[[TMP19]])  {
-!CHECK:    %[[ARG1_I128:.*]] = fir.convert %[[ARG1]] : (i64) -> i128
-!CHECK:    fir.store %[[ARG1_I128]] to %[[STORE4:.*]]#1 : !fir.ref<i128>
-!CHECK:    %[[LOAD4:.*]] = fir.load %[[STORE4]]#0 : !fir.ref<i128>
-!CHECK:    %[[TMP21:.*]] = fir.convert %[[LOAD4]] : (i128) -> f32
-!CHECK:    hlfir.assign %[[TMP21]] to %{{.*}} : f32, !fir.ref<f32>
-!CHECK:    omp.yield
-!CHECK:  }
+!CHECK:      %[[TMP17:.*]] = fir.convert %{{.*}} : (i8) -> i64
+!CHECK:      %[[TMP18:.*]] = fir.convert %{{.*}} : (i16) -> i64
+!CHECK:      %[[TMP19:.*]] = fir.convert %{{.*}} : (i32) -> i64
+!CHECK:      omp.wsloop {
+!CHECK-NEXT:   omp.loop_nest (%[[ARG1:.*]]) : i64 = (%[[TMP17]]) to (%[[TMP18]]) inclusive step (%[[TMP19]])  {
+!CHECK:          %[[ARG1_I128:.*]] = fir.convert %[[ARG1]] : (i64) -> i128
+!CHECK:          fir.store %[[ARG1_I128]] to %[[STORE4:.*]]#1 : !fir.ref<i128>
+!CHECK:          %[[LOAD4:.*]] = fir.load %[[STORE4]]#0 : !fir.ref<i128>
+!CHECK:          %[[TMP21:.*]] = fir.convert %[[LOAD4]] : (i128) -> f32
+!CHECK:          hlfir.assign %[[TMP21]] to %{{.*}} : f32, !fir.ref<f32>
+!CHECK:          omp.yield
+!CHECK:        }
+!CHECK:        omp.terminator
+!CHECK:      }
 
   !$omp do
   do i16 = i1_lb, i2_ub, i4_s
@@ -118,32 +127,35 @@ subroutine wsloop_variable_sub
 !CHECK:           %[[VAL_24:.*]] = fir.load %[[VAL_13]]#0 : !fir.ref<i16>
 !CHECK:           %[[VAL_25:.*]] = fir.convert %[[VAL_23]] : (i8) -> i32
 !CHECK:           %[[VAL_26:.*]] = fir.convert %[[VAL_24]] : (i16) -> i32
-!CHECK:           omp.wsloop   for  (%[[VAL_27:.*]]) : i32 = (%[[VAL_22]]) to (%[[VAL_25]]) inclusive step (%[[VAL_26]]) {
-!CHECK:             %[[VAL_28:.*]] = fir.convert %[[VAL_27]] : (i32) -> i16
-!CHECK:             fir.store %[[VAL_28]] to %[[VAL_3]]#1 : !fir.ref<i16>
-!CHECK:             %[[VAL_29:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i128>
-!CHECK:             %[[VAL_30:.*]] = fir.convert %[[VAL_29]] : (i128) -> index
-!CHECK:             %[[VAL_31:.*]] = arith.constant 100 : i32
-!CHECK:             %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> index
-!CHECK:             %[[VAL_33:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
-!CHECK:             %[[VAL_34:.*]] = fir.convert %[[VAL_33]] : (i32) -> index
-!CHECK:             %[[VAL_35:.*]] = fir.convert %[[VAL_30]] : (index) -> i64
-!CHECK:             %[[VAL_36:.*]]:2 = fir.do_loop %[[VAL_37:.*]] = %[[VAL_30]] to %[[VAL_32]] step %[[VAL_34]] iter_args(%[[VAL_38:.*]] = %[[VAL_35]]) -> (index, i64) {
-!CHECK:               fir.store %[[VAL_38]] to %[[VAL_17]]#1 : !fir.ref<i64>
-!CHECK:               %[[VAL_39:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<i16>
-!CHECK:               %[[VAL_40:.*]] = fir.convert %[[VAL_39]] : (i16) -> i64
-!CHECK:               %[[VAL_41:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<i64>
-!CHECK:               %[[VAL_42:.*]] = arith.addi %[[VAL_40]], %[[VAL_41]] : i64
-!CHECK:               %[[VAL_43:.*]] = fir.convert %[[VAL_42]] : (i64) -> f32
-!CHECK:               hlfir.assign %[[VAL_43]] to %[[VAL_21]]#0 : f32, !fir.ref<f32>
-!CHECK:               %[[VAL_44:.*]] = arith.addi %[[VAL_37]], %[[VAL_34]] : index
-!CHECK:               %[[VAL_45:.*]] = fir.convert %[[VAL_34]] : (index) -> i64
-!CHECK:               %[[VAL_46:.*]] = fir.load %[[VAL_17]]#1 : !fir.ref<i64>
-!CHECK:               %[[VAL_47:.*]] = arith.addi %[[VAL_46]], %[[VAL_45]] : i64
-!CHECK:               fir.result %[[VAL_44]], %[[VAL_47]] : index, i64
+!CHECK:           omp.wsloop {
+!CHECK-NEXT:        omp.loop_nest (%[[VAL_27:.*]]) : i32 = (%[[VAL_22]]) to (%[[VAL_25]]) inclusive step (%[[VAL_26]]) {
+!CHECK:               %[[VAL_28:.*]] = fir.convert %[[VAL_27]] : (i32) -> i16
+!CHECK:               fir.store %[[VAL_28]] to %[[VAL_3]]#1 : !fir.ref<i16>
+!CHECK:               %[[VAL_29:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i128>
+!CHECK:               %[[VAL_30:.*]] = fir.convert %[[VAL_29]] : (i128) -> index
+!CHECK:               %[[VAL_31:.*]] = arith.constant 100 : i32
+!CHECK:               %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> index
+!CHECK:               %[[VAL_33:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
+!CHECK:               %[[VAL_34:.*]] = fir.convert %[[VAL_33]] : (i32) -> index
+!CHECK:               %[[VAL_35:.*]] = fir.convert %[[VAL_30]] : (index) -> i64
+!CHECK:               %[[VAL_36:.*]]:2 = fir.do_loop %[[VAL_37:.*]] = %[[VAL_30]] to %[[VAL_32]] step %[[VAL_34]] iter_args(%[[VAL_38:.*]] = %[[VAL_35]]) -> (index, i64) {
+!CHECK:                 fir.store %[[VAL_38]] to %[[VAL_17]]#1 : !fir.ref<i64>
+!CHECK:                 %[[VAL_39:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<i16>
+!CHECK:                 %[[VAL_40:.*]] = fir.convert %[[VAL_39]] : (i16) -> i64
+!CHECK:                 %[[VAL_41:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<i64>
+!CHECK:                 %[[VAL_42:.*]] = arith.addi %[[VAL_40]], %[[VAL_41]] : i64
+!CHECK:                 %[[VAL_43:.*]] = fir.convert %[[VAL_42]] : (i64) -> f32
+!CHECK:                 hlfir.assign %[[VAL_43]] to %[[VAL_21]]#0 : f32, !fir.ref<f32>
+!CHECK:                 %[[VAL_44:.*]] = arith.addi %[[VAL_37]], %[[VAL_34]] : index
+!CHECK:                 %[[VAL_45:.*]] = fir.convert %[[VAL_34]] : (index) -> i64
+!CHECK:                 %[[VAL_46:.*]] = fir.load %[[VAL_17]]#1 : !fir.ref<i64>
+!CHECK:                 %[[VAL_47:.*]] = arith.addi %[[VAL_46]], %[[VAL_45]] : i64
+!CHECK:                 fir.result %[[VAL_44]], %[[VAL_47]] : index, i64
+!CHECK:               }
+!CHECK:               fir.store %[[VAL_48:.*]]#1 to %[[VAL_17]]#1 : !fir.ref<i64>
+!CHECK:               omp.yield
 !CHECK:             }
-!CHECK:             fir.store %[[VAL_48:.*]]#1 to %[[VAL_17]]#1 : !fir.ref<i64>
-!CHECK:             omp.yield
+!CHECK:             omp.terminator
 !CHECK:           }
 
   !$omp do
@@ -160,16 +172,19 @@ subroutine wsloop_variable_sub
 !CHECK:           %[[VAL_50:.*]] = arith.constant 1 : i32
 !CHECK:           %[[VAL_51:.*]] = arith.constant 10 : i32
 !CHECK:           %[[VAL_52:.*]] = arith.constant 1 : i32
-!CHECK:           omp.wsloop   for  (%[[VAL_53:.*]]) : i32 = (%[[VAL_50]]) to (%[[VAL_51]]) inclusive step (%[[VAL_52]]) {
-!CHECK:             %[[VAL_54:.*]] = fir.convert %[[VAL_53]] : (i32) -> i8
-!CHECK:             fir.store %[[VAL_54]] to %[[VAL_1]]#1 : !fir.ref<i8>
-!CHECK:             %[[VAL_55:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref<i8>
-!CHECK:             %[[VAL_56:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i8>
-!CHECK:             %[[VAL_57:.*]] = arith.cmpi eq, %[[VAL_55]], %[[VAL_56]] : i8
-!CHECK:             fir.if %[[VAL_57]] {
-!CHECK:             } else {
+!CHECK:           omp.wsloop {
+!CHECK-NEXT:        omp.loop_nest (%[[VAL_53:.*]]) : i32 = (%[[VAL_50]]) to (%[[VAL_51]]) inclusive step (%[[VAL_52]]) {
+!CHECK:               %[[VAL_54:.*]] = fir.convert %[[VAL_53]] : (i32) -> i8
+!CHECK:               fir.store %[[VAL_54]] to %[[VAL_1]]#1 : !fir.ref<i8>
+!CHECK:               %[[VAL_55:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref<i8>
+!CHECK:               %[[VAL_56:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i8>
+!CHECK:               %[[VAL_57:.*]] = arith.cmpi eq, %[[VAL_55]], %[[VAL_56]] : i8
+!CHECK:               fir.if %[[VAL_57]] {
+!CHECK:               } else {
+!CHECK:               }
+!CHECK:               omp.yield
 !CHECK:             }
-!CHECK:             omp.yield
+!CHECK:             omp.terminator
 !CHECK:           }
   j1 = 5
   !$omp do
diff --git a/flang/test/Lower/OpenMP/wsloop.f90 b/flang/test/Lower/OpenMP/wsloop.f90
index 4068f715c3e189..da90cb7241597f 100644
--- a/flang/test/Lower/OpenMP/wsloop.f90
+++ b/flang/test/Lower/OpenMP/wsloop.f90
@@ -7,22 +7,24 @@ subroutine simple_loop
   integer :: i
   ! CHECK:  omp.parallel
   !$OMP PARALLEL
-  ! CHECK:     %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
-  ! CHECK:     %[[IV_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_IV]] {uniq_name = "_QFsimple_loopEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
-  ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
-  ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:      %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
+  ! CHECK:      %[[IV_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_IV]] {uniq_name = "_QFsimple_loopEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  ! CHECK:      %[[WS_LB:.*]] = arith.constant 1 : i32
+  ! CHECK:      %[[WS_UB:.*]] = arith.constant 9 : i32
+  ! CHECK:      %[[WS_STEP:.*]] = arith.constant 1 : i32
+  ! CHECK:      omp.wsloop {
+  ! CHECK-NEXT:   omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
   !$OMP DO
   do i=1, 9
-  ! CHECK:             fir.store %[[I]] to %[[IV_DECL:.*]]#1 : !fir.ref<i32>
-  ! CHECK:             %[[LOAD_IV:.*]] = fir.load %[[IV_DECL]]#0 : !fir.ref<i32>
-  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+  ! CHECK:          fir.store %[[I]] to %[[IV_DECL:.*]]#1 : !fir.ref<i32>
+  ! CHECK:          %[[LOAD_IV:.*]] = fir.load %[[IV_DECL]]#0 : !fir.ref<i32>
+  ! CHECK:          fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
-  ! CHECK:       omp.yield
+  ! CHECK:          omp.yield
+  ! CHECK:        omp.terminator
   !$OMP END DO
-  ! CHECK:       omp.terminator
+  ! CHECK:      omp.terminator
   !$OMP END PARALLEL
 end subroutine
 
@@ -31,22 +33,24 @@ subroutine simple_loop_with_step
   integer :: i
   ! CHECK:  omp.parallel
   !$OMP PARALLEL
-  ! CHECK:     %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
-  ! CHECK:     %[[IV_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_IV]] {uniq_name = "_QFsimple_loop_with_stepEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
-  ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
-  ! CHECK:     %[[WS_STEP:.*]] = arith.constant 2 : i32
-  ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
-  ! CHECK:       fir.store %[[I]] to %[[IV_DECL]]#1 : !fir.ref<i32>
-  ! CHECK:       %[[LOAD_IV:.*]] = fir.load %[[IV_DECL]]#0 : !fir.ref<i32>
+  ! CHECK:      %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
+  ! CHECK:      %[[IV_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_IV]] {uniq_name = "_QFsimple_loop_with_stepEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  ! CHECK:      %[[WS_LB:.*]] = arith.constant 1 : i32
+  ! CHECK:      %[[WS_UB:.*]] = arith.constant 9 : i32
+  ! CHECK:      %[[WS_STEP:.*]] = arith.constant 2 : i32
+  ! CHECK:      omp.wsloop {
+  ! CHECK-NEXT:   omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
+  ! CHECK:          fir.store %[[I]] to %[[IV_DECL]]#1 : !fir.ref<i32>
+  ! CHECK:          %[[LOAD_IV:.*]] = fir.load %[[IV_DECL]]#0 : !fir.ref<i32>
   !$OMP DO
   do i=1, 9, 2
-  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+  ! CHECK:          fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
-  ! CHECK:       omp.yield
+  ! CHECK:          omp.yield
+  ! CHECK:        omp.terminator
   !$OMP END DO
-  ! CHECK:       omp.terminator
+  ! CHECK:      omp.terminator
   !$OMP END PARALLEL
 end subroutine
 
@@ -55,21 +59,23 @@ subroutine loop_with_schedule_nowait
   integer :: i
   ! CHECK:  omp.parallel
   !$OMP PARALLEL
-  ! CHECK:     %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
-  ! CHECK:     %[[IV_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_IV]] {uniq_name = "_QFloop_with_schedule_nowaitEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
-  ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
-  ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:     omp.wsloop schedule(runtime) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:      %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
+  ! CHECK:      %[[IV_DECL:.*]]:2 = hlfir.declare %[[ALLOCA_IV]] {uniq_name = "_QFloop_with_schedule_nowaitEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  ! CHECK:      %[[WS_LB:.*]] = arith.constant 1 : i32
+  ! CHECK:      %[[WS_UB:.*]] = arith.constant 9 : i32
+  ! CHECK:      %[[WS_STEP:.*]] = arith.constant 1 : i32
+  ! CHECK:      omp.wsloop schedule(runtime) nowait {
+  ! CHECK-NEXT:   omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
   !$OMP DO SCHEDULE(runtime)
   do i=1, 9
-  ! CHECK:       fir.store %[[I]] to %[[IV_DECL]]#1 : !fir.ref<i32>
-  ! CHECK:       %[[LOAD_IV:.*]] = fir.load %[[IV_DECL]]#0 : !fir.ref<i32>
-  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+  ! CHECK:          fir.store %[[I]] to %[[IV_DECL]]#1 : !fir.ref<i32>
+  ! CHECK:          %[[LOAD_IV:.*]] = fir.load %[[IV_DECL]]#0 : !fir.ref<i32>
+  ! CHECK:          fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
-  ! CHECK:       omp.yield
+  ! CHECK:          omp.yield
+  ! CHECK:        omp.terminator
   !$OMP END DO NOWAIT
-  ! CHECK:       omp.terminator
+  ! CHECK:      omp.terminator
   !$OMP END PARALLEL
 end subroutine
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 6fb5296c2cc626..16f9675e9eed8f 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -583,29 +583,29 @@ def LoopNestOp : OpenMP_Op<"loop_nest", [SameVariadicOperandSize,
 //===----------------------------------------------------------------------===//
 
 def WsloopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
-                         AllTypesMatch<["lowerBound", "upperBound", "step"]>,
                          DeclareOpInterfaceMethods<LoopWrapperInterface>,
-                         RecursiveMemoryEffects, ReductionClauseInterface]> {
+                         RecursiveMemoryEffects, ReductionClauseInterface,
+                         SingleBlockImplicitTerminator<"TerminatorOp">]> {
   let summary = "worksharing-loop construct";
   let description = [{
     The worksharing-loop construct specifies that the iterations of the loop(s)
     will be executed in parallel by threads in the current context. These
     iterations are spread across threads that already exist in the enclosing
-    parallel region. The lower and upper bounds specify a half-open range: the
-    range includes the lower bound but does not include the upper bound. If the
-    `inclusive` attribute is specified then the upper bound is also included.
+    parallel region.
 
-    The body region can contain any number of blocks. The region is terminated
-    by "omp.yield" instruction without operands.
+    The body region can contain a single block which must contain a single
+    operation and a terminator. The operation must be another compatible loop
+    wrapper or an `omp.loop_nest`.
 
     ```
-    omp.wsloop <clauses>
-    for (%i1, %i2) : index = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) {
-      %a = load %arrA[%i1, %i2] : memref<?x?xf32>
-      %b = load %arrB[%i1, %i2] : memref<?x?xf32>
-      %sum = arith.addf %a, %b : f32
-      store %sum, %arrC[%i1, %i2] : memref<?x?xf32>
-      omp.yield
+    omp.wsloop <clauses> {
+      omp.loop_nest (%i1, %i2) : index = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) {
+        %a = load %arrA[%i1, %i2] : memref<?x?xf32>
+        %b = load %arrB[%i1, %i2] : memref<?x?xf32>
+        %sum = arith.addf %a, %b : f32
+        store %sum, %arrC[%i1, %i2] : memref<?x?xf32>
+        omp.yield
+      }
     }
     ```
 
@@ -629,10 +629,6 @@ def WsloopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
     The optional `schedule_chunk_var` associated with this determines further
     controls this distribution.
 
-    Collapsed loops are represented by the worksharing-loop having a list of
-    indices, bounds and steps where the size of the list is equal to the
-    collapse value.
-
     The `nowait` attribute, when present, signifies that there should be no
     implicit barrier at the end of the loop.
 
@@ -648,10 +644,7 @@ def WsloopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
     passed by reference.
   }];
 
-  let arguments = (ins Variadic<IntLikeType>:$lowerBound,
-             Variadic<IntLikeType>:$upperBound,
-             Variadic<IntLikeType>:$step,
-             Variadic<AnyType>:$linear_vars,
+  let arguments = (ins Variadic<AnyType>:$linear_vars,
              Variadic<I32>:$linear_step_vars,
              Variadic<OpenMP_PointerLikeType>:$reduction_vars,
              OptionalAttr<SymbolRefArrayAttr>:$reductions,
@@ -662,21 +655,15 @@ def WsloopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
              UnitAttr:$nowait,
              UnitAttr:$byref,
              ConfinedAttr<OptionalAttr<I64Attr>, [IntMinValue<0>]>:$ordered_val,
-             OptionalAttr<OrderKindAttr>:$order_val,
-             UnitAttr:$inclusive);
+             OptionalAttr<OrderKindAttr>:$order_val);
 
   let builders = [
-    OpBuilder<(ins "ValueRange":$lowerBound, "ValueRange":$upperBound,
-               "ValueRange":$step,
-               CArg<"ArrayRef<NamedAttribute>", "{}">:$attributes)>,
+    OpBuilder<(ins CArg<"ArrayRef<NamedAttribute>", "{}">:$attributes)>
   ];
 
   let regions = (region AnyRegion:$region);
 
   let extraClassDeclaration = [{
-    /// Returns the number of loops in the worksharing-loop nest.
-    unsigned getNumLoops() { return getLowerBound().size(); }
-
     /// Returns the number of reduction variables.
     unsigned getNumReductionVars() { return getReductionVars().size(); }
   }];
@@ -693,9 +680,8 @@ def WsloopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
           |`byref` $byref
           |`ordered` `(` $ordered_val `)`
           |`order` `(` custom<ClauseAttr>($order_val) `)`
-    ) custom<Wsloop>($region, $lowerBound, $upperBound, $step, type($step),
-                     $reduction_vars, type($reduction_vars), $reductions,
-                     $inclusive) attr-dict
+    ) custom<Wsloop>($region, $reduction_vars, type($reduction_vars),
+                     $reductions) attr-dict
   }];
   let hasVerifier = 1;
 }
@@ -782,8 +768,8 @@ def SimdOp : OpenMP_Op<"simd", [AttrSizedOperandSegments,
 
 def YieldOp : OpenMP_Op<"yield",
     [Pure, ReturnLike, Terminator,
-     ParentOneOf<["LoopNestOp", "WsloopOp", "DeclareReductionOp",
-     "AtomicUpdateOp", "PrivateClauseOp"]>]> {
+     ParentOneOf<["AtomicUpdateOp", "DeclareReductionOp", "LoopNestOp",
+                  "PrivateClauseOp"]>]> {
   let summary = "loop yield and termination operation";
   let description = [{
     "omp.yield" yields SSA values from the OpenMP dialect op region and
diff --git a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp
index 7f91367ad427a2..f0b8d6c5309357 100644
--- a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp
+++ b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp
@@ -461,18 +461,51 @@ struct ParallelOpLowering : public OpRewritePattern<scf::ParallelOp> {
       // Replace the loop.
       {
         OpBuilder::InsertionGuard allocaGuard(rewriter);
-        auto loop = rewriter.create<omp::WsloopOp>(
+        // Create worksharing loop wrapper.
+        auto wsloopOp = rewriter.create<omp::WsloopOp>(parallelOp.getLoc());
+        if (!reductionVariables.empty()) {
+          wsloopOp.setReductionsAttr(
+              ArrayAttr::get(rewriter.getContext(), reductionDeclSymbols));
+          wsloopOp.getReductionVarsMutable().append(reductionVariables);
+        }
+        rewriter.create<omp::TerminatorOp>(loc); // omp.parallel terminator.
+
+        // The wrapper's entry block arguments will define the reduction
+        // variables.
+        llvm::SmallVector<mlir::Type> reductionTypes;
+        reductionTypes.reserve(reductionVariables.size());
+        llvm::transform(reductionVariables, std::back_inserter(reductionTypes),
+                        [](mlir::Value v) { return v.getType(); });
+        rewriter.createBlock(
+            &wsloopOp.getRegion(), {}, reductionTypes,
+            llvm::SmallVector<mlir::Location>(reductionVariables.size(),
+                                              parallelOp.getLoc()));
+
+        rewriter.setInsertionPoint(
+            rewriter.create<omp::TerminatorOp>(parallelOp.getLoc()));
+
+        // Create loop nest and populate region with contents of scf.parallel.
+        auto loopOp = rewriter.create<omp::LoopNestOp>(
             parallelOp.getLoc(), parallelOp.getLowerBound(),
             parallelOp.getUpperBound(), parallelOp.getStep());
-        rewriter.create<omp::TerminatorOp>(loc);
 
-        rewriter.inlineRegionBefore(parallelOp.getRegion(), loop.getRegion(),
-                                    loop.getRegion().begin());
+        rewriter.inlineRegionBefore(parallelOp.getRegion(), loopOp.getRegion(),
+                                    loopOp.getRegion().begin());
+
+        // Remove reduction-related block arguments from omp.loop_nest and
+        // redirect uses to the corresponding omp.wsloop block argument.
+        mlir::Block &loopOpEntryBlock = loopOp.getRegion().front();
+        unsigned numLoops = parallelOp.getNumLoops();
+        rewriter.replaceAllUsesWith(
+            loopOpEntryBlock.getArguments().drop_front(numLoops),
+            wsloopOp.getRegion().getArguments());
+        loopOpEntryBlock.eraseArguments(
+            numLoops, loopOpEntryBlock.getNumArguments() - numLoops);
 
-        Block *ops = rewriter.splitBlock(&*loop.getRegion().begin(),
-                                         loop.getRegion().begin()->begin());
+        Block *ops = rewriter.splitBlock(&*loopOp.getRegion().begin(),
+                                         loopOp.getRegion().begin()->begin());
 
-        rewriter.setInsertionPointToStart(&*loop.getRegion().begin());
+        rewriter.setInsertionPointToStart(&*loopOp.getRegion().begin());
 
         auto scope = rewriter.create<memref::AllocaScopeOp>(parallelOp.getLoc(),
                                                             TypeRange());
@@ -481,11 +514,6 @@ struct ParallelOpLowering : public OpRewritePattern<scf::ParallelOp> {
         rewriter.mergeBlocks(ops, scopeBlock);
         rewriter.setInsertionPointToEnd(&*scope.getBodyRegion().begin());
         rewriter.create<memref::AllocaScopeReturnOp>(loc, ValueRange());
-        if (!reductionVariables.empty()) {
-          loop.setReductionsAttr(
-              ArrayAttr::get(rewriter.getContext(), reductionDeclSymbols));
-          loop.getReductionVarsMutable().append(reductionVariables);
-        }
       }
     }
 
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 8747188cbf1125..e48f91a197d7af 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -1348,83 +1348,31 @@ LogicalResult SingleOp::verify() {
 // WsloopOp
 //===----------------------------------------------------------------------===//
 
-/// loop-control ::= `(` ssa-id-list `)` `:` type `=`  loop-bounds
-/// loop-bounds := `(` ssa-id-list `)` to `(` ssa-id-list `)` inclusive? steps
-/// steps := `step` `(`ssa-id-list`)`
 ParseResult
 parseWsloop(OpAsmParser &parser, Region &region,
-            SmallVectorImpl<OpAsmParser::UnresolvedOperand> &lowerBound,
-            SmallVectorImpl<OpAsmParser::UnresolvedOperand> &upperBound,
-            SmallVectorImpl<OpAsmParser::UnresolvedOperand> &steps,
-            SmallVectorImpl<Type> &loopVarTypes,
             SmallVectorImpl<OpAsmParser::UnresolvedOperand> &reductionOperands,
-            SmallVectorImpl<Type> &reductionTypes, ArrayAttr &reductionSymbols,
-            UnitAttr &inclusive) {
-
+            SmallVectorImpl<Type> &reductionTypes,
+            ArrayAttr &reductionSymbols) {
   // Parse an optional reduction clause
   llvm::SmallVector<OpAsmParser::Argument> privates;
-  bool hasReduction = succeeded(parser.parseOptionalKeyword("reduction")) &&
-                      succeeded(parseClauseWithRegionArgs(
-                          parser, region, reductionOperands, reductionTypes,
-                          reductionSymbols, privates));
-
-  if (parser.parseKeyword("for"))
-    return failure();
-
-  // Parse an opening `(` followed by induction variables followed by `)`
-  SmallVector<OpAsmParser::Argument> ivs;
-  Type loopVarType;
-  if (parser.parseArgumentList(ivs, OpAsmParser::Delimiter::Paren) ||
-      parser.parseColonType(loopVarType) ||
-      // Parse loop bounds.
-      parser.parseEqual() ||
-      parser.parseOperandList(lowerBound, ivs.size(),
-                              OpAsmParser::Delimiter::Paren) ||
-      parser.parseKeyword("to") ||
-      parser.parseOperandList(upperBound, ivs.size(),
-                              OpAsmParser::Delimiter::Paren))
-    return failure();
-
-  if (succeeded(parser.parseOptionalKeyword("inclusive")))
-    inclusive = UnitAttr::get(parser.getBuilder().getContext());
-
-  // Parse step values.
-  if (parser.parseKeyword("step") ||
-      parser.parseOperandList(steps, ivs.size(), OpAsmParser::Delimiter::Paren))
-    return failure();
-
-  // Now parse the body.
-  loopVarTypes = SmallVector<Type>(ivs.size(), loopVarType);
-  for (auto &iv : ivs)
-    iv.type = loopVarType;
-
-  SmallVector<OpAsmParser::Argument> regionArgs{ivs};
-  if (hasReduction)
-    llvm::copy(privates, std::back_inserter(regionArgs));
-
-  return parser.parseRegion(region, regionArgs);
+  if (succeeded(parser.parseOptionalKeyword("reduction"))) {
+    if (failed(parseClauseWithRegionArgs(parser, region, reductionOperands,
+                                         reductionTypes, reductionSymbols,
+                                         privates)))
+      return failure();
+  }
+  return parser.parseRegion(region, privates);
 }
 
 void printWsloop(OpAsmPrinter &p, Operation *op, Region &region,
-                 ValueRange lowerBound, ValueRange upperBound, ValueRange steps,
-                 TypeRange loopVarTypes, ValueRange reductionOperands,
-                 TypeRange reductionTypes, ArrayAttr reductionSymbols,
-                 UnitAttr inclusive) {
+                 ValueRange reductionOperands, TypeRange reductionTypes,
+                 ArrayAttr reductionSymbols) {
   if (reductionSymbols) {
-    auto reductionArgs =
-        region.front().getArguments().drop_front(loopVarTypes.size());
+    auto reductionArgs = region.front().getArguments();
     printClauseWithRegionArgs(p, op, reductionArgs, "reduction",
                               reductionOperands, reductionTypes,
                               reductionSymbols);
   }
-
-  p << " for ";
-  auto args = region.front().getArguments().drop_back(reductionOperands.size());
-  p << " (" << args << ") : " << args[0].getType() << " = (" << lowerBound
-    << ") to (" << upperBound << ") ";
-  if (inclusive)
-    p << "inclusive ";
-  p << "step (" << steps << ") ";
   p.printRegion(region, /*printEntryBlockArgs=*/false);
 }
 
@@ -1725,6 +1673,9 @@ void LoopNestOp::print(OpAsmPrinter &p) {
 }
 
 LogicalResult LoopNestOp::verify() {
+  if (getLowerBound().empty())
+    return emitOpError() << "must represent at least one loop";
+
   if (getLowerBound().size() != getIVs().size())
     return emitOpError() << "number of range arguments and IVs do not match";
 
@@ -1760,20 +1711,27 @@ void LoopNestOp::gatherWrappers(
 //===----------------------------------------------------------------------===//
 
 void WsloopOp::build(OpBuilder &builder, OperationState &state,
-                     ValueRange lowerBound, ValueRange upperBound,
-                     ValueRange step, ArrayRef<NamedAttribute> attributes) {
-  build(builder, state, lowerBound, upperBound, step,
-        /*linear_vars=*/ValueRange(),
+                     ArrayRef<NamedAttribute> attributes) {
+  build(builder, state, /*linear_vars=*/ValueRange(),
         /*linear_step_vars=*/ValueRange(), /*reduction_vars=*/ValueRange(),
         /*reductions=*/nullptr, /*schedule_val=*/nullptr,
         /*schedule_chunk_var=*/nullptr, /*schedule_modifier=*/nullptr,
         /*simd_modifier=*/false, /*nowait=*/false, /*byref=*/false,
-        /*ordered_val=*/nullptr,
-        /*order_val=*/nullptr, /*inclusive=*/false);
+        /*ordered_val=*/nullptr, /*order_val=*/nullptr);
   state.addAttributes(attributes);
 }
 
 LogicalResult WsloopOp::verify() {
+  if (!isWrapper())
+    return emitOpError() << "must be a loop wrapper";
+
+  if (LoopWrapperInterface nested = getNestedWrapper()) {
+    // Check for the allowed leaf constructs that may appear in a composite
+    // construct directly after DO/FOR.
+    if (!isa<SimdOp>(nested))
+      return emitError() << "only supported nested wrapper is 'omp.simd'";
+  }
+
   return verifyReductionVarList(*this, getReductions(), getReductionVars());
 }
 
@@ -1824,9 +1782,10 @@ LogicalResult OrderedRegionOp::verify() {
   if (getSimd())
     return failure();
 
-  if (auto container = (*this)->getParentOfType<WsloopOp>()) {
-    if (!container.getOrderedValAttr() ||
-        container.getOrderedValAttr().getInt() != 0)
+  if (auto loopOp = dyn_cast<LoopNestOp>((*this)->getParentOp())) {
+    auto wsloopOp = llvm::dyn_cast_if_present<WsloopOp>(loopOp->getParentOp());
+    if (!wsloopOp || !wsloopOp.getOrderedValAttr() ||
+        wsloopOp.getOrderedValAttr().getInt() != 0)
       return emitOpError() << "ordered region must be closely nested inside "
                            << "a worksharing-loop region with an ordered "
                            << "clause without parameter present";
@@ -1967,19 +1926,22 @@ LogicalResult CancelOp::verify() {
                          << "inside a parallel region";
   }
   if (cct == ClauseCancellationConstructType::Loop) {
-    if (!isa<WsloopOp>(parentOp)) {
-      return emitOpError() << "cancel loop must appear "
-                           << "inside a worksharing-loop region";
+    auto loopOp = dyn_cast<LoopNestOp>(parentOp);
+    auto wsloopOp = llvm::dyn_cast_if_present<WsloopOp>(
+        loopOp ? loopOp->getParentOp() : nullptr);
+
+    if (!wsloopOp) {
+      return emitOpError()
+             << "cancel loop must appear inside a worksharing-loop region";
     }
-    if (cast<WsloopOp>(parentOp).getNowaitAttr()) {
-      return emitError() << "A worksharing construct that is canceled "
-                         << "must not have a nowait clause";
+    if (wsloopOp.getNowaitAttr()) {
+      return emitError() << "A worksharing construct that is canceled must not "
+                            "have a `nowait` clause";
     }
-    if (cast<WsloopOp>(parentOp).getOrderedValAttr()) {
-      return emitError() << "A worksharing construct that is canceled "
-                         << "must not have an ordered clause";
+    if (wsloopOp.getOrderedValAttr()) {
+      return emitError() << "A worksharing construct that is canceled must not "
+                            "have an `ordered` clause";
     }
-
   } else if (cct == ClauseCancellationConstructType::Sections) {
     if (!(isa<SectionsOp>(parentOp) || isa<SectionOp>(parentOp))) {
       return emitOpError() << "cancel sections must appear "
@@ -1988,7 +1950,7 @@ LogicalResult CancelOp::verify() {
     if (isa_and_nonnull<SectionsOp>(parentOp->getParentOp()) &&
         cast<SectionsOp>(parentOp->getParentOp()).getNowaitAttr()) {
       return emitError() << "A sections construct that is canceled "
-                         << "must not have a nowait clause";
+                         << "must not have a `nowait` clause";
     }
   }
   // TODO : Add more when we support taskgroup.
@@ -2013,7 +1975,7 @@ LogicalResult CancellationPointOp::verify() {
                          << "inside a parallel region";
   }
   if ((cct == ClauseCancellationConstructType::Loop) &&
-      !isa<WsloopOp>(parentOp)) {
+      (!isa<LoopNestOp>(parentOp) || !isa<WsloopOp>(parentOp->getParentOp()))) {
     return emitOpError() << "cancellation point loop must appear "
                          << "inside a worksharing-loop region";
   }
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 858b033d39cc72..2956dcc6730337 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -862,35 +862,33 @@ static void collectReductionInfo(
 static LogicalResult
 convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
                  LLVM::ModuleTranslation &moduleTranslation) {
-  auto loop = cast<omp::WsloopOp>(opInst);
-  const bool isByRef = loop.getByref();
-  // TODO: this should be in the op verifier instead.
-  if (loop.getLowerBound().empty())
-    return failure();
+  auto wsloopOp = cast<omp::WsloopOp>(opInst);
+  auto loopOp = cast<omp::LoopNestOp>(wsloopOp.getWrappedLoop());
+  const bool isByRef = wsloopOp.getByref();
 
   // Static is the default.
   auto schedule =
-      loop.getScheduleVal().value_or(omp::ClauseScheduleKind::Static);
+      wsloopOp.getScheduleVal().value_or(omp::ClauseScheduleKind::Static);
 
   // Find the loop configuration.
-  llvm::Value *step = moduleTranslation.lookupValue(loop.getStep()[0]);
+  llvm::Value *step = moduleTranslation.lookupValue(loopOp.getStep()[0]);
   llvm::Type *ivType = step->getType();
   llvm::Value *chunk = nullptr;
-  if (loop.getScheduleChunkVar()) {
+  if (wsloopOp.getScheduleChunkVar()) {
     llvm::Value *chunkVar =
-        moduleTranslation.lookupValue(loop.getScheduleChunkVar());
+        moduleTranslation.lookupValue(wsloopOp.getScheduleChunkVar());
     chunk = builder.CreateSExtOrTrunc(chunkVar, ivType);
   }
 
   SmallVector<omp::DeclareReductionOp> reductionDecls;
-  collectReductionDecls(loop, reductionDecls);
+  collectReductionDecls(wsloopOp, reductionDecls);
   llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
       findAllocaInsertPoint(builder, moduleTranslation);
 
   SmallVector<llvm::Value *> privateReductionVariables;
   DenseMap<Value, llvm::Value *> reductionVariableMap;
   if (!isByRef) {
-    allocByValReductionVars(loop, builder, moduleTranslation, allocaIP,
+    allocByValReductionVars(wsloopOp, builder, moduleTranslation, allocaIP,
                             reductionDecls, privateReductionVariables,
                             reductionVariableMap);
   }
@@ -898,9 +896,8 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
   // Before the loop, store the initial values of reductions into reduction
   // variables. Although this could be done after allocas, we don't want to mess
   // up with the alloca insertion point.
-  MutableArrayRef<BlockArgument> reductionArgs =
-      loop.getRegion().getArguments().take_back(loop.getNumReductionVars());
-  for (unsigned i = 0; i < loop.getNumReductionVars(); ++i) {
+  ArrayRef<BlockArgument> reductionArgs = wsloopOp.getRegion().getArguments();
+  for (unsigned i = 0; i < wsloopOp.getNumReductionVars(); ++i) {
     SmallVector<llvm::Value *> phis;
     if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),
                                        "omp.reduction.neutral", builder,
@@ -919,7 +916,7 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
 
       privateReductionVariables.push_back(var);
       moduleTranslation.mapValue(reductionArgs[i], phis[0]);
-      reductionVariableMap.try_emplace(loop.getReductionVars()[i], phis[0]);
+      reductionVariableMap.try_emplace(wsloopOp.getReductionVars()[i], phis[0]);
     } else {
       // for by-ref case the store is inside of the reduction region
       builder.CreateStore(phis[0], privateReductionVariables[i]);
@@ -945,33 +942,34 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
   auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) {
     // Make sure further conversions know about the induction variable.
     moduleTranslation.mapValue(
-        loop.getRegion().front().getArgument(loopInfos.size()), iv);
+        loopOp.getRegion().front().getArgument(loopInfos.size()), iv);
 
     // Capture the body insertion point for use in nested loops. BodyIP of the
     // CanonicalLoopInfo always points to the beginning of the entry block of
     // the body.
     bodyInsertPoints.push_back(ip);
 
-    if (loopInfos.size() != loop.getNumLoops() - 1)
+    if (loopInfos.size() != loopOp.getNumLoops() - 1)
       return;
 
     // Convert the body of the loop.
     builder.restoreIP(ip);
-    convertOmpOpRegions(loop.getRegion(), "omp.wsloop.region", builder,
+    convertOmpOpRegions(loopOp.getRegion(), "omp.wsloop.region", builder,
                         moduleTranslation, bodyGenStatus);
   };
 
   // Delegate actual loop construction to the OpenMP IRBuilder.
-  // TODO: this currently assumes Wsloop is semantically similar to SCF loop,
-  // i.e. it has a positive step, uses signed integer semantics. Reconsider
-  // this code when Wsloop clearly supports more cases.
+  // TODO: this currently assumes omp.loop_nest is semantically similar to SCF
+  // loop, i.e. it has a positive step, uses signed integer semantics.
+  // Reconsider this code when the nested loop operation clearly supports more
+  // cases.
   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
-  for (unsigned i = 0, e = loop.getNumLoops(); i < e; ++i) {
+  for (unsigned i = 0, e = loopOp.getNumLoops(); i < e; ++i) {
     llvm::Value *lowerBound =
-        moduleTranslation.lookupValue(loop.getLowerBound()[i]);
+        moduleTranslation.lookupValue(loopOp.getLowerBound()[i]);
     llvm::Value *upperBound =
-        moduleTranslation.lookupValue(loop.getUpperBound()[i]);
-    llvm::Value *step = moduleTranslation.lookupValue(loop.getStep()[i]);
+        moduleTranslation.lookupValue(loopOp.getUpperBound()[i]);
+    llvm::Value *step = moduleTranslation.lookupValue(loopOp.getStep()[i]);
 
     // Make sure loop trip count are emitted in the preheader of the outermost
     // loop at the latest so that they are all available for the new collapsed
@@ -984,7 +982,7 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
     }
     loopInfos.push_back(ompBuilder->createCanonicalLoop(
         loc, bodyGen, lowerBound, upperBound, step,
-        /*IsSigned=*/true, loop.getInclusive(), computeIP));
+        /*IsSigned=*/true, loopOp.getInclusive(), computeIP));
 
     if (failed(bodyGenStatus))
       return failure();
@@ -999,13 +997,13 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
   allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
 
   // TODO: Handle doacross loops when the ordered clause has a parameter.
-  bool isOrdered = loop.getOrderedVal().has_value();
+  bool isOrdered = wsloopOp.getOrderedVal().has_value();
   std::optional<omp::ScheduleModifier> scheduleModifier =
-      loop.getScheduleModifier();
-  bool isSimd = loop.getSimdModifier();
+      wsloopOp.getScheduleModifier();
+  bool isSimd = wsloopOp.getSimdModifier();
 
   ompBuilder->applyWorkshareLoop(
-      ompLoc.DL, loopInfo, allocaIP, !loop.getNowait(),
+      ompLoc.DL, loopInfo, allocaIP, !wsloopOp.getNowait(),
       convertToScheduleKind(schedule), chunk, isSimd,
       scheduleModifier == omp::ScheduleModifier::monotonic,
       scheduleModifier == omp::ScheduleModifier::nonmonotonic, isOrdered);
@@ -1017,7 +1015,7 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
   builder.restoreIP(afterIP);
 
   // Process the reductions if required.
-  if (loop.getNumReductionVars() == 0)
+  if (wsloopOp.getNumReductionVars() == 0)
     return success();
 
   // Create the reduction generators. We need to own them here because
@@ -1025,7 +1023,7 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
   SmallVector<OwningReductionGen> owningReductionGens;
   SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
   SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos;
-  collectReductionInfo(loop, builder, moduleTranslation, reductionDecls,
+  collectReductionInfo(wsloopOp, builder, moduleTranslation, reductionDecls,
                        owningReductionGens, owningAtomicReductionGens,
                        privateReductionVariables, reductionInfos);
 
@@ -1036,9 +1034,9 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
   builder.SetInsertPoint(tempTerminator);
   llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =
       ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos,
-                                   loop.getNowait(), isByRef);
+                                   wsloopOp.getNowait(), isByRef);
   if (!contInsertPoint.getBlock())
-    return loop->emitOpError() << "failed to convert reductions";
+    return wsloopOp->emitOpError() << "failed to convert reductions";
   auto nextInsertionPoint =
       ompBuilder->createBarrier(contInsertPoint, llvm::omp::OMPD_for);
   tempTerminator->eraseFromParent();
diff --git a/mlir/test/CAPI/execution_engine.c b/mlir/test/CAPI/execution_engine.c
index 38a8fb8c3e2137..81ff8477ffd7b7 100644
--- a/mlir/test/CAPI/execution_engine.c
+++ b/mlir/test/CAPI/execution_engine.c
@@ -99,8 +99,11 @@ void testOmpCreation(void) {
 "    %1 = arith.constant 1 : i32                                                \n"
 "    %2 = arith.constant 2 : i32                                                \n"
 "    omp.parallel {                                                             \n"
-"      omp.wsloop for (%3) : i32 = (%0) to (%2) step (%1) {                     \n"
-"        omp.yield                                                              \n"
+"      omp.wsloop {                                                             \n"
+"        omp.loop_nest (%3) : i32 = (%0) to (%2) step (%1) {                    \n"
+"          omp.yield                                                            \n"
+"        }                                                                      \n"
+"        omp.terminator                                                         \n"
 "      }                                                                        \n"
 "      omp.terminator                                                           \n"
 "    }                                                                          \n"
diff --git a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir
index 9f45d139b81f21..3aeb9e70522d52 100644
--- a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir
+++ b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir
@@ -71,15 +71,18 @@ func.func @branch_loop() {
 func.func @wsloop(%arg0: index, %arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index) {
   // CHECK: omp.parallel
   omp.parallel {
-    // CHECK: omp.wsloop for (%[[ARG6:.*]], %[[ARG7:.*]]) : i64 = (%[[ARG0]], %[[ARG1]]) to (%[[ARG2]], %[[ARG3]]) step (%[[ARG4]], %[[ARG5]]) {
-    "omp.wsloop"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5) ({
-    ^bb0(%arg6: index, %arg7: index):
-      // CHECK-DAG: %[[CAST_ARG6:.*]] = builtin.unrealized_conversion_cast %[[ARG6]] : i64 to index
-      // CHECK-DAG: %[[CAST_ARG7:.*]] = builtin.unrealized_conversion_cast %[[ARG7]] : i64 to index
-      // CHECK: "test.payload"(%[[CAST_ARG6]], %[[CAST_ARG7]]) : (index, index) -> ()
-      "test.payload"(%arg6, %arg7) : (index, index) -> ()
-      omp.yield
-    }) {operandSegmentSizes = array<i32: 2, 2, 2, 0, 0, 0, 0>} : (index, index, index, index, index, index) -> ()
+    // CHECK: omp.wsloop {
+    "omp.wsloop"() ({
+      // CHECK: omp.loop_nest (%[[ARG6:.*]], %[[ARG7:.*]]) : i64 = (%[[ARG0]], %[[ARG1]]) to (%[[ARG2]], %[[ARG3]]) step (%[[ARG4]], %[[ARG5]]) {
+      omp.loop_nest (%arg6, %arg7) : index = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) {
+        // CHECK-DAG: %[[CAST_ARG6:.*]] = builtin.unrealized_conversion_cast %[[ARG6]] : i64 to index
+        // CHECK-DAG: %[[CAST_ARG7:.*]] = builtin.unrealized_conversion_cast %[[ARG7]] : i64 to index
+        // CHECK: "test.payload"(%[[CAST_ARG6]], %[[CAST_ARG7]]) : (index, index) -> ()
+        "test.payload"(%arg6, %arg7) : (index, index) -> ()
+        omp.yield
+      }
+      omp.terminator
+    }) : () -> ()
     omp.terminator
   }
   return
@@ -323,12 +326,14 @@ llvm.func @_QPsb() {
 // CHECK-LABEL:  @_QPsimple_reduction
 // CHECK:    %[[RED_ACCUMULATOR:.*]] = llvm.alloca %{{.*}} x i32 {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"} : (i64) -> !llvm.ptr
 // CHECK:    omp.parallel
-// CHECK:      omp.wsloop reduction(@eqv_reduction %{{.+}} -> %[[PRV:.+]] : !llvm.ptr) for
-// CHECK:        %[[LPRV:.+]] = llvm.load %[[PRV]] : !llvm.ptr -> i32
-// CHECK:        %[[CMP:.+]] = llvm.icmp "eq" %{{.*}}, %[[LPRV]] : i32
-// CHECK:        %[[ZEXT:.+]] = llvm.zext %[[CMP]] : i1 to i32
-// CHECK:        llvm.store %[[ZEXT]], %[[PRV]] : i32, !llvm.ptr
-// CHECK:        omp.yield
+// CHECK:      omp.wsloop reduction(@eqv_reduction %{{.+}} -> %[[PRV:.+]] : !llvm.ptr)
+// CHECK-NEXT:   omp.loop_nest {{.*}}{
+// CHECK:          %[[LPRV:.+]] = llvm.load %[[PRV]] : !llvm.ptr -> i32
+// CHECK:          %[[CMP:.+]] = llvm.icmp "eq" %{{.*}}, %[[LPRV]] : i32
+// CHECK:          %[[ZEXT:.+]] = llvm.zext %[[CMP]] : i1 to i32
+// CHECK:          llvm.store %[[ZEXT]], %[[PRV]] : i32, !llvm.ptr
+// CHECK:          omp.yield
+// CHECK:        omp.terminator
 // CHECK:      omp.terminator
 // CHECK:    llvm.return
 
@@ -354,20 +359,23 @@ llvm.func @_QPsimple_reduction(%arg0: !llvm.ptr {fir.bindc_name = "y"}) {
   %4 = llvm.alloca %3 x i32 {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"} : (i64) -> !llvm.ptr
   %5 = llvm.zext %2 : i1 to i32
   llvm.store %5, %4 : i32, !llvm.ptr
-  omp.parallel   {
+  omp.parallel {
     %6 = llvm.alloca %3 x i32 {adapt.valuebyref, in_type = i32, operandSegmentSizes = array<i32: 0, 0>, pinned} : (i64) -> !llvm.ptr
-    omp.wsloop   reduction(@eqv_reduction %4 -> %prv : !llvm.ptr) for  (%arg1) : i32 = (%1) to (%0) inclusive step (%1) {
-      llvm.store %arg1, %6 : i32, !llvm.ptr
-      %7 = llvm.load %6 : !llvm.ptr -> i32
-      %8 = llvm.sext %7 : i32 to i64
-      %9 = llvm.sub %8, %3  : i64
-      %10 = llvm.getelementptr %arg0[0, %9] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<100 x i32>
-      %11 = llvm.load %10 : !llvm.ptr -> i32
-      %12 = llvm.load %prv : !llvm.ptr -> i32
-      %13 = llvm.icmp "eq" %11, %12 : i32
-      %14 = llvm.zext %13 : i1 to i32
-      llvm.store %14, %prv : i32, !llvm.ptr
-      omp.yield
+    omp.wsloop reduction(@eqv_reduction %4 -> %prv : !llvm.ptr) {
+      omp.loop_nest (%arg1) : i32 = (%1) to (%0) inclusive step (%1) {
+        llvm.store %arg1, %6 : i32, !llvm.ptr
+        %7 = llvm.load %6 : !llvm.ptr -> i32
+        %8 = llvm.sext %7 : i32 to i64
+        %9 = llvm.sub %8, %3  : i64
+        %10 = llvm.getelementptr %arg0[0, %9] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<100 x i32>
+        %11 = llvm.load %10 : !llvm.ptr -> i32
+        %12 = llvm.load %prv : !llvm.ptr -> i32
+        %13 = llvm.icmp "eq" %11, %12 : i32
+        %14 = llvm.zext %13 : i1 to i32
+        llvm.store %14, %prv : i32, !llvm.ptr
+        omp.yield
+      }
+      omp.terminator
     }
     omp.terminator
   }
diff --git a/mlir/test/Conversion/SCFToOpenMP/reductions.mlir b/mlir/test/Conversion/SCFToOpenMP/reductions.mlir
index 3b6c145d62f1a8..7f31872018297b 100644
--- a/mlir/test/Conversion/SCFToOpenMP/reductions.mlir
+++ b/mlir/test/Conversion/SCFToOpenMP/reductions.mlir
@@ -28,6 +28,7 @@ func.func @reduction1(%arg0 : index, %arg1 : index, %arg2 : index,
   // CHECK: omp.parallel
   // CHECK: omp.wsloop
   // CHECK-SAME: reduction(@[[$REDF]] %[[BUF]] -> %[[PVT_BUF:[a-z0-9]+]]
+  // CHECK: omp.loop_nest
   // CHECK: memref.alloca_scope
   scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
                             step (%arg4, %step) init (%zero) -> (f32) {
@@ -43,6 +44,7 @@ func.func @reduction1(%arg0 : index, %arg1 : index, %arg2 : index,
     }
     // CHECK: omp.yield
   }
+  // CHECK:   omp.terminator
   // CHECK: omp.terminator
   // CHECK: llvm.load %[[BUF]]
   return
@@ -208,6 +210,7 @@ func.func @reduction4(%arg0 : index, %arg1 : index, %arg2 : index,
   // CHECK: omp.wsloop
   // CHECK-SAME: reduction(@[[$REDF1]] %[[BUF1]] -> %[[PVT_BUF1:[a-z0-9]+]]
   // CHECK-SAME:           @[[$REDF2]] %[[BUF2]] -> %[[PVT_BUF2:[a-z0-9]+]]
+  // CHECK: omp.loop_nest
   // CHECK: memref.alloca_scope
   %res:2 = scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
                         step (%arg4, %step) init (%zero, %ione) -> (f32, i64) {
@@ -236,6 +239,7 @@ func.func @reduction4(%arg0 : index, %arg1 : index, %arg2 : index,
     }
     // CHECK: omp.yield
   }
+  // CHECK:   omp.terminator
   // CHECK: omp.terminator
   // CHECK: %[[RES1:.*]] = llvm.load %[[BUF1]] : !llvm.ptr -> f32
   // CHECK: %[[RES2:.*]] = llvm.load %[[BUF2]] : !llvm.ptr -> i64
diff --git a/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir b/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir
index acd2690c56e2e6..b2f19d294cb5fe 100644
--- a/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir
+++ b/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir
@@ -2,10 +2,11 @@
 
 // CHECK-LABEL: @parallel
 func.func @parallel(%arg0: index, %arg1: index, %arg2: index,
-          %arg3: index, %arg4: index, %arg5: index) {
+                    %arg3: index, %arg4: index, %arg5: index) {
   // CHECK: %[[FOUR:.+]] = llvm.mlir.constant(4 : i32) : i32
   // CHECK: omp.parallel num_threads(%[[FOUR]] : i32) {
-  // CHECK: omp.wsloop for (%[[LVAR1:.*]], %[[LVAR2:.*]]) : index = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) {
+  // CHECK: omp.wsloop {
+  // CHECK: omp.loop_nest (%[[LVAR1:.*]], %[[LVAR2:.*]]) : index = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) {
   // CHECK: memref.alloca_scope
   scf.parallel (%i, %j) = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) {
     // CHECK: "test.payload"(%[[LVAR1]], %[[LVAR2]]) : (index, index) -> ()
@@ -13,6 +14,8 @@ func.func @parallel(%arg0: index, %arg1: index, %arg2: index,
     // CHECK:   omp.yield
     // CHECK: }
   }
+  // CHECK:     omp.terminator
+  // CHECK:   }
   // CHECK:   omp.terminator
   // CHECK: }
   return
@@ -23,20 +26,26 @@ func.func @nested_loops(%arg0: index, %arg1: index, %arg2: index,
                    %arg3: index, %arg4: index, %arg5: index) {
   // CHECK: %[[FOUR:.+]] = llvm.mlir.constant(4 : i32) : i32
   // CHECK: omp.parallel num_threads(%[[FOUR]] : i32) {
-  // CHECK: omp.wsloop for (%[[LVAR_OUT1:.*]]) : index = (%arg0) to (%arg2) step (%arg4) {
-    // CHECK: memref.alloca_scope
+  // CHECK: omp.wsloop {
+  // CHECK: omp.loop_nest (%[[LVAR_OUT1:.*]]) : index = (%arg0) to (%arg2) step (%arg4) {
+  // CHECK: memref.alloca_scope
   scf.parallel (%i) = (%arg0) to (%arg2) step (%arg4) {
     // CHECK: omp.parallel
-    // CHECK: omp.wsloop for (%[[LVAR_IN1:.*]]) : index = (%arg1) to (%arg3) step (%arg5) {
+    // CHECK: omp.wsloop {
+    // CHECK: omp.loop_nest (%[[LVAR_IN1:.*]]) : index = (%arg1) to (%arg3) step (%arg5) {
     // CHECK: memref.alloca_scope
     scf.parallel (%j) = (%arg1) to (%arg3) step (%arg5) {
       // CHECK: "test.payload"(%[[LVAR_OUT1]], %[[LVAR_IN1]]) : (index, index) -> ()
       "test.payload"(%i, %j) : (index, index) -> ()
       // CHECK: }
     }
-    // CHECK:   omp.yield
+    // CHECK:     omp.yield
+    // CHECK:   }
+    // CHECK:   omp.terminator
     // CHECK: }
   }
+  // CHECK:     omp.terminator
+  // CHECK:   }
   // CHECK:   omp.terminator
   // CHECK: }
   return
@@ -47,7 +56,8 @@ func.func @adjacent_loops(%arg0: index, %arg1: index, %arg2: index,
                      %arg3: index, %arg4: index, %arg5: index) {
   // CHECK: %[[FOUR:.+]] = llvm.mlir.constant(4 : i32) : i32
   // CHECK: omp.parallel num_threads(%[[FOUR]] : i32) {
-  // CHECK: omp.wsloop for (%[[LVAR_AL1:.*]]) : index = (%arg0) to (%arg2) step (%arg4) {
+  // CHECK: omp.wsloop {
+  // CHECK: omp.loop_nest (%[[LVAR_AL1:.*]]) : index = (%arg0) to (%arg2) step (%arg4) {
   // CHECK: memref.alloca_scope
   scf.parallel (%i) = (%arg0) to (%arg2) step (%arg4) {
     // CHECK: "test.payload1"(%[[LVAR_AL1]]) : (index) -> ()
@@ -55,12 +65,15 @@ func.func @adjacent_loops(%arg0: index, %arg1: index, %arg2: index,
     // CHECK:   omp.yield
     // CHECK: }
   }
+  // CHECK:     omp.terminator
+  // CHECK:   }
   // CHECK:   omp.terminator
   // CHECK: }
 
   // CHECK: %[[FOUR:.+]] = llvm.mlir.constant(4 : i32) : i32
   // CHECK: omp.parallel num_threads(%[[FOUR]] : i32) {
-  // CHECK: omp.wsloop for (%[[LVAR_AL2:.*]]) : index = (%arg1) to (%arg3) step (%arg5) {
+  // CHECK: omp.wsloop {
+  // CHECK: omp.loop_nest (%[[LVAR_AL2:.*]]) : index = (%arg1) to (%arg3) step (%arg5) {
   // CHECK: memref.alloca_scope
   scf.parallel (%j) = (%arg1) to (%arg3) step (%arg5) {
     // CHECK: "test.payload2"(%[[LVAR_AL2]]) : (index) -> ()
@@ -68,6 +81,8 @@ func.func @adjacent_loops(%arg0: index, %arg1: index, %arg2: index,
     // CHECK:   omp.yield
     // CHECK: }
   }
+  // CHECK:     omp.terminator
+  // CHECK:   }
   // CHECK:   omp.terminator
   // CHECK: }
   return
diff --git a/mlir/test/Dialect/LLVMIR/legalize-for-export.mlir b/mlir/test/Dialect/LLVMIR/legalize-for-export.mlir
index 37720e98d92a9e..b1b06740f19442 100644
--- a/mlir/test/Dialect/LLVMIR/legalize-for-export.mlir
+++ b/mlir/test/Dialect/LLVMIR/legalize-for-export.mlir
@@ -32,14 +32,17 @@ llvm.func @repeated_successor_no_args(%arg0: i1) {
 
 // CHECK: @repeated_successor_openmp
 llvm.func @repeated_successor_openmp(%arg0: i64, %arg1: i64, %arg2: i64, %arg3: i1) {
-  omp.wsloop for (%arg4) : i64 = (%arg0) to (%arg1) step (%arg2) {
-    // CHECK: llvm.cond_br %{{.*}}, ^[[BB1:.*]]({{.*}}), ^[[BB2:.*]]({{.*}})
-    llvm.cond_br %arg3, ^bb1(%arg0 : i64), ^bb1(%arg1 : i64)
-  // CHECK: ^[[BB1]]
-  ^bb1(%0: i64):  // 2 preds: ^bb0, ^bb0
-    omp.yield
-  // CHECK: ^[[BB2]](%[[ARG:.*]]: i64):
-  // CHECK:  llvm.br ^[[BB1]](%[[ARG]] : i64)
+  omp.wsloop {
+    omp.loop_nest (%arg4) : i64 = (%arg0) to (%arg1) step (%arg2) {
+      // CHECK: llvm.cond_br %{{.*}}, ^[[BB1:.*]]({{.*}}), ^[[BB2:.*]]({{.*}})
+      llvm.cond_br %arg3, ^bb1(%arg0 : i64), ^bb1(%arg1 : i64)
+    // CHECK: ^[[BB1]]
+    ^bb1(%0: i64):  // 2 preds: ^bb0, ^bb0
+      omp.yield
+    // CHECK: ^[[BB2]](%[[ARG:.*]]: i64):
+    // CHECK:  llvm.br ^[[BB1]](%[[ARG]] : i64)
+    }
+    omp.terminator
   }
   llvm.return
 }
diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir
index 8d17d880548970..efed5bbd4abcac 100644
--- a/mlir/test/Dialect/OpenMP/invalid.mlir
+++ b/mlir/test/Dialect/OpenMP/invalid.mlir
@@ -97,52 +97,92 @@ func.func @invalid_parent(%lb : index, %ub : index, %step : index) {
 // -----
 
 func.func @invalid_wrapper(%lb : index, %ub : index, %step : index) {
-  // TODO Remove induction variables from omp.wsloop.
-  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
+  omp.parallel {
     %0 = arith.constant 0 : i32
     // expected-error at +1 {{op expects parent op to be a valid loop wrapper}}
     omp.loop_nest (%iv2) : index = (%lb) to (%ub) step (%step) {
       omp.yield
     }
-    omp.yield
+    omp.terminator
+  }
+}
+
+// -----
+
+func.func @no_loops(%lb : index, %ub : index, %step : index) {
+  omp.wsloop {
+    // expected-error at +1 {{op must represent at least one loop}}
+    "omp.loop_nest" () ({
+    ^bb0():
+      omp.yield
+    }) : () -> ()
+    omp.terminator
   }
 }
 
 // -----
 
 func.func @type_mismatch(%lb : index, %ub : index, %step : index) {
-  // TODO Remove induction variables from omp.wsloop.
-  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
+  omp.wsloop {
     // expected-error at +1 {{range argument type does not match corresponding IV type}}
     "omp.loop_nest" (%lb, %ub, %step) ({
     ^bb0(%iv2: i32):
       omp.yield
     }) : (index, index, index) -> ()
-    omp.yield
+    omp.terminator
   }
 }
 
 // -----
 
 func.func @iv_number_mismatch(%lb : index, %ub : index, %step : index) {
-  // TODO Remove induction variables from omp.wsloop.
-  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
+  omp.wsloop {
     // expected-error at +1 {{number of range arguments and IVs do not match}}
     "omp.loop_nest" (%lb, %ub, %step) ({
     ^bb0(%iv1 : index, %iv2 : index):
       omp.yield
     }) : (index, index, index) -> ()
-    omp.yield
+    omp.terminator
+  }
+}
+
+// -----
+
+func.func @no_wrapper(%lb : index, %ub : index, %step : index) {
+  // expected-error @below {{op must be a loop wrapper}}
+  omp.wsloop {
+    %0 = arith.constant 0 : i32
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
+  }
+}
+
+// -----
+
+func.func @invalid_nested_wrapper(%lb : index, %ub : index, %step : index) {
+  // expected-error @below {{only supported nested wrapper is 'omp.simd'}}
+  omp.wsloop {
+    omp.distribute {
+      omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+        omp.yield
+      }
+      omp.terminator
+    }
+    omp.terminator
   }
 }
 
 // -----
 
 func.func @inclusive_not_a_clause(%lb : index, %ub : index, %step : index) {
-  // expected-error @below {{expected 'for'}}
-  omp.wsloop nowait inclusive
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
-    omp.yield
+  // expected-error @below {{expected '{'}}
+  omp.wsloop nowait inclusive {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
   }
 }
 
@@ -150,39 +190,47 @@ func.func @inclusive_not_a_clause(%lb : index, %ub : index, %step : index) {
 
 func.func @order_value(%lb : index, %ub : index, %step : index) {
   // expected-error @below {{invalid clause value: 'default'}}
-  omp.wsloop order(default)
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
-    omp.yield
+  omp.wsloop order(default) {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
   }
 }
 
 // -----
 
 func.func @if_not_allowed(%lb : index, %ub : index, %step : index, %bool_var : i1) {
-  // expected-error @below {{expected 'for'}}
-  omp.wsloop if(%bool_var: i1)
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
-    omp.yield
+  // expected-error @below {{expected '{'}}
+  omp.wsloop if(%bool_var: i1) {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
   }
 }
 
 // -----
 
 func.func @num_threads_not_allowed(%lb : index, %ub : index, %step : index, %int_var : i32) {
-  // expected-error @below {{expected 'for'}}
-  omp.wsloop num_threads(%int_var: i32)
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
-    omp.yield
+  // expected-error @below {{expected '{'}}
+  omp.wsloop num_threads(%int_var: i32) {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
   }
 }
 
 // -----
 
 func.func @proc_bind_not_allowed(%lb : index, %ub : index, %step : index) {
-  // expected-error @below {{expected 'for'}}
-  omp.wsloop proc_bind(close)
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
-    omp.yield
+  // expected-error @below {{expected '{'}}
+  omp.wsloop proc_bind(close) {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
   }
 }
 
@@ -190,9 +238,11 @@ func.func @proc_bind_not_allowed(%lb : index, %ub : index, %step : index) {
 
 llvm.func @test_omp_wsloop_dynamic_bad_modifier(%lb : i64, %ub : i64, %step : i64) -> () {
   // expected-error @+1 {{unknown modifier type: ginandtonic}}
-  omp.wsloop schedule(dynamic, ginandtonic)
-  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
-    omp.yield
+  omp.wsloop schedule(dynamic, ginandtonic) {
+    omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
   }
   llvm.return
 }
@@ -201,9 +251,11 @@ llvm.func @test_omp_wsloop_dynamic_bad_modifier(%lb : i64, %ub : i64, %step : i6
 
 llvm.func @test_omp_wsloop_dynamic_many_modifier(%lb : i64, %ub : i64, %step : i64) -> () {
   // expected-error @+1 {{unexpected modifier(s)}}
-  omp.wsloop schedule(dynamic, monotonic, monotonic, monotonic)
-  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
-    omp.yield
+  omp.wsloop schedule(dynamic, monotonic, monotonic, monotonic) {
+    omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
   }
   llvm.return
 }
@@ -212,9 +264,11 @@ llvm.func @test_omp_wsloop_dynamic_many_modifier(%lb : i64, %ub : i64, %step : i
 
 llvm.func @test_omp_wsloop_dynamic_wrong_modifier(%lb : i64, %ub : i64, %step : i64) -> () {
   // expected-error @+1 {{incorrect modifier order}}
-  omp.wsloop schedule(dynamic, simd, monotonic)
-  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
-    omp.yield
+  omp.wsloop schedule(dynamic, simd, monotonic) {
+    omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
   }
   llvm.return
 }
@@ -223,9 +277,11 @@ llvm.func @test_omp_wsloop_dynamic_wrong_modifier(%lb : i64, %ub : i64, %step :
 
 llvm.func @test_omp_wsloop_dynamic_wrong_modifier2(%lb : i64, %ub : i64, %step : i64) -> () {
   // expected-error @+1 {{incorrect modifier order}}
-  omp.wsloop schedule(dynamic, monotonic, monotonic)
-  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
-    omp.yield
+  omp.wsloop schedule(dynamic, monotonic, monotonic) {
+    omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
   }
   llvm.return
 }
@@ -234,9 +290,11 @@ llvm.func @test_omp_wsloop_dynamic_wrong_modifier2(%lb : i64, %ub : i64, %step :
 
 llvm.func @test_omp_wsloop_dynamic_wrong_modifier3(%lb : i64, %ub : i64, %step : i64) -> () {
   // expected-error @+1 {{incorrect modifier order}}
-  omp.wsloop schedule(dynamic, simd, simd)
-  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
-    omp.yield
+  omp.wsloop schedule(dynamic, simd, simd) {
+    omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
   }
   llvm.return
 }
@@ -516,11 +574,13 @@ func.func @foo(%lb : index, %ub : index, %step : index) {
   %1 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
 
   // expected-error @below {{expected symbol reference @foo to point to a reduction declaration}}
-  omp.wsloop reduction(@foo %0 -> %prv : !llvm.ptr)
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
-    %2 = arith.constant 2.0 : f32
-    omp.reduction %2, %1 : f32, !llvm.ptr
-    omp.yield
+  omp.wsloop reduction(@foo %0 -> %prv : !llvm.ptr) {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      %2 = arith.constant 2.0 : f32
+      omp.reduction %2, %1 : f32, !llvm.ptr
+      omp.yield
+    }
+    omp.terminator
   }
   return
 }
@@ -544,11 +604,13 @@ func.func @foo(%lb : index, %ub : index, %step : index) {
   %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
 
   // expected-error @below {{accumulator variable used more than once}}
-  omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr, @add_f32 %0 -> %prv1 : !llvm.ptr)
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
-    %2 = arith.constant 2.0 : f32
-    omp.reduction %2, %0 : f32, !llvm.ptr
-    omp.yield
+  omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr, @add_f32 %0 -> %prv1 : !llvm.ptr) {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      %2 = arith.constant 2.0 : f32
+      omp.reduction %2, %0 : f32, !llvm.ptr
+      omp.yield
+    }
+    omp.terminator
   }
   return
 }
@@ -577,11 +639,13 @@ func.func @foo(%lb : index, %ub : index, %step : index, %mem : memref<1xf32>) {
   %c1 = arith.constant 1 : i32
 
   // expected-error @below {{expected accumulator ('memref<1xf32>') to be the same type as reduction declaration ('!llvm.ptr')}}
-  omp.wsloop reduction(@add_f32 %mem -> %prv : memref<1xf32>)
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
-    %2 = arith.constant 2.0 : f32
-    omp.reduction %2, %mem : f32, memref<1xf32>
-    omp.yield
+  omp.wsloop reduction(@add_f32 %mem -> %prv : memref<1xf32>) {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      %2 = arith.constant 2.0 : f32
+      omp.reduction %2, %mem : f32, memref<1xf32>
+      omp.yield
+    }
+    omp.terminator
   }
   return
 }
@@ -614,13 +678,15 @@ omp.critical.declare @mutex hint(invalid_hint)
 // -----
 
 func.func @omp_ordered1(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () {
-  omp.wsloop ordered(1)
-  for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
-    // expected-error @below {{ordered region must be closely nested inside a worksharing-loop region with an ordered clause without parameter present}}
-    omp.ordered.region {
-      omp.terminator
+  omp.wsloop ordered(1) {
+    omp.loop_nest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
+      // expected-error @below {{ordered region must be closely nested inside a worksharing-loop region with an ordered clause without parameter present}}
+      omp.ordered.region {
+        omp.terminator
+      }
+      omp.yield
     }
-    omp.yield
+    omp.terminator
   }
   return
 }
@@ -628,12 +694,15 @@ func.func @omp_ordered1(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () {
 // -----
 
 func.func @omp_ordered2(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () {
-  omp.wsloop for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
-    // expected-error @below {{ordered region must be closely nested inside a worksharing-loop region with an ordered clause without parameter present}}
-    omp.ordered.region {
-      omp.terminator
+  omp.wsloop {
+    omp.loop_nest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
+      // expected-error @below {{ordered region must be closely nested inside a worksharing-loop region with an ordered clause without parameter present}}
+      omp.ordered.region {
+        omp.terminator
+      }
+      omp.yield
     }
-    omp.yield
+    omp.terminator
   }
   return
 }
@@ -649,24 +718,28 @@ func.func @omp_ordered3(%vec0 : i64) -> () {
 // -----
 
 func.func @omp_ordered4(%arg1 : i32, %arg2 : i32, %arg3 : i32, %vec0 : i64) -> () {
-  omp.wsloop ordered(0)
-  for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
-    // expected-error @below {{ordered depend directive must be closely nested inside a worksharing-loop with ordered clause with parameter present}}
-    omp.ordered depend_type(dependsink) depend_vec(%vec0 : i64) {num_loops_val = 1 : i64}
+  omp.wsloop ordered(0) {
+    omp.loop_nest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
+      // expected-error @below {{ordered depend directive must be closely nested inside a worksharing-loop with ordered clause with parameter present}}
+      omp.ordered depend_type(dependsink) depend_vec(%vec0 : i64) {num_loops_val = 1 : i64}
 
-    omp.yield
+      omp.yield
+    }
+    omp.terminator
   }
   return
 }
 // -----
 
 func.func @omp_ordered5(%arg1 : i32, %arg2 : i32, %arg3 : i32, %vec0 : i64, %vec1 : i64) -> () {
-  omp.wsloop ordered(1)
-  for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
-    // expected-error @below {{number of variables in depend clause does not match number of iteration variables in the doacross loop}}
-    omp.ordered depend_type(dependsource) depend_vec(%vec0, %vec1 : i64, i64) {num_loops_val = 2 : i64}
+  omp.wsloop ordered(1) {
+    omp.loop_nest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
+      // expected-error @below {{number of variables in depend clause does not match number of iteration variables in the doacross loop}}
+      omp.ordered depend_type(dependsource) depend_vec(%vec0, %vec1 : i64, i64) {num_loops_val = 2 : i64}
 
-    omp.yield
+      omp.yield
+    }
+    omp.terminator
   }
   return
 }
@@ -1505,11 +1578,13 @@ func.func @omp_cancel2() {
 // -----
 
 func.func @omp_cancel3(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () {
-  omp.wsloop nowait
-    for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
-    // expected-error @below {{A worksharing construct that is canceled must not have a nowait clause}}
-    omp.cancel cancellation_construct_type(loop)
-    // CHECK: omp.terminator
+  omp.wsloop nowait {
+    omp.loop_nest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
+      // expected-error @below {{A worksharing construct that is canceled must not have a `nowait` clause}}
+      omp.cancel cancellation_construct_type(loop)
+      // CHECK: omp.yield
+      omp.yield
+    }
     omp.terminator
   }
   return
@@ -1518,11 +1593,13 @@ func.func @omp_cancel3(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () {
 // -----
 
 func.func @omp_cancel4(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () {
-  omp.wsloop ordered(1)
-    for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
-    // expected-error @below {{A worksharing construct that is canceled must not have an ordered clause}}
-    omp.cancel cancellation_construct_type(loop)
-    // CHECK: omp.terminator
+  omp.wsloop ordered(1) {
+    omp.loop_nest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
+      // expected-error @below {{A worksharing construct that is canceled must not have an `ordered` clause}}
+      omp.cancel cancellation_construct_type(loop)
+      // CHECK: omp.yield
+      omp.yield
+    }
     omp.terminator
   }
   return
@@ -1533,7 +1610,7 @@ func.func @omp_cancel4(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () {
 func.func @omp_cancel5() -> () {
   omp.sections nowait {
     omp.section {
-      // expected-error @below {{A sections construct that is canceled must not have a nowait clause}}
+      // expected-error @below {{A sections construct that is canceled must not have a `nowait` clause}}
       omp.cancel cancellation_construct_type(sections)
       omp.terminator
     }
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index e34108d0a78fee..aa7123cddaab3a 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -135,8 +135,7 @@ func.func @omp_parallel_pretty(%data_var : memref<i32>, %if_cond : i1, %num_thre
 
 // CHECK-LABEL: omp_loop_nest
 func.func @omp_loop_nest(%lb : index, %ub : index, %step : index) -> () {
-  // TODO Remove induction variables from omp.wsloop.
-  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
+  omp.wsloop {
     // CHECK: omp.loop_nest
     // CHECK-SAME: (%{{.*}}) : index =
     // CHECK-SAME: (%{{.*}}) to (%{{.*}}) step (%{{.*}})
@@ -144,11 +143,10 @@ func.func @omp_loop_nest(%lb : index, %ub : index, %step : index) -> () {
     ^bb0(%iv2: index):
       omp.yield
     }) : (index, index, index) -> ()
-    omp.yield
+    omp.terminator
   }
 
-  // TODO Remove induction variables from omp.wsloop.
-  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
+  omp.wsloop {
     // CHECK: omp.loop_nest
     // CHECK-SAME: (%{{.*}}) : index =
     // CHECK-SAME: (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}})
@@ -156,11 +154,10 @@ func.func @omp_loop_nest(%lb : index, %ub : index, %step : index) -> () {
     ^bb0(%iv2: index):
       omp.yield
     }) {inclusive} : (index, index, index) -> ()
-    omp.yield
+    omp.terminator
   }
 
-  // TODO Remove induction variables from omp.wsloop.
-  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
+  omp.wsloop {
     // CHECK: omp.loop_nest
     // CHECK-SAME: (%{{.*}}, %{{.*}}) : index =
     // CHECK-SAME: (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}})
@@ -168,7 +165,7 @@ func.func @omp_loop_nest(%lb : index, %ub : index, %step : index) -> () {
     ^bb0(%iv2: index, %iv3: index):
       omp.yield
     }) : (index, index, index, index, index, index) -> ()
-    omp.yield
+    omp.terminator
   }
 
   return
@@ -176,231 +173,298 @@ func.func @omp_loop_nest(%lb : index, %ub : index, %step : index) -> () {
 
 // CHECK-LABEL: omp_loop_nest_pretty
 func.func @omp_loop_nest_pretty(%lb : index, %ub : index, %step : index) -> () {
-  // TODO Remove induction variables from omp.wsloop.
-  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
+  omp.wsloop {
     // CHECK: omp.loop_nest
     // CHECK-SAME: (%{{.*}}) : index =
     // CHECK-SAME: (%{{.*}}) to (%{{.*}}) step (%{{.*}})
     omp.loop_nest (%iv2) : index = (%lb) to (%ub) step (%step) {
       omp.yield
     }
-    omp.yield
+    omp.terminator
   }
 
-  // TODO Remove induction variables from omp.wsloop.
-  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
+  omp.wsloop {
     // CHECK: omp.loop_nest
     // CHECK-SAME: (%{{.*}}) : index =
     // CHECK-SAME: (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}})
     omp.loop_nest (%iv2) : index = (%lb) to (%ub) inclusive step (%step) {
       omp.yield
     }
-    omp.yield
+    omp.terminator
   }
 
-  // TODO Remove induction variables from omp.wsloop.
-  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
+  omp.wsloop {
     // CHECK: omp.loop_nest
     // CHECK-SAME: (%{{.*}}) : index =
     // CHECK-SAME: (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}})
     omp.loop_nest (%iv2, %iv3) : index = (%lb, %lb) to (%ub, %ub) step (%step, %step) {
       omp.yield
     }
-    omp.yield
+    omp.terminator
   }
 
   return
 }
 
-// CHECK-LABEL: omp_wsloop
-func.func @omp_wsloop(%lb : index, %ub : index, %step : index, %data_var : memref<i32>, %linear_var : i32, %chunk_var : i32) -> () {
+// CHECK-LABEL: omp_loop_nest_pretty_multi_block
+func.func @omp_loop_nest_pretty_multi_block(%lb : index, %ub : index,
+    %step : index, %data1 : memref<?xi32>, %data2 : memref<?xi32>) -> () {
 
-  // CHECK: omp.wsloop ordered(1)
-  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  "omp.wsloop" (%lb, %ub, %step) ({
-    ^bb0(%iv: index):
-      omp.yield
-  }) {operandSegmentSizes = array<i32: 1,1,1,0,0,0,0>, ordered_val = 1} :
-    (index, index, index) -> ()
-
-  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(static)
-  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  "omp.wsloop" (%lb, %ub, %step, %data_var, %linear_var) ({
-    ^bb0(%iv: index):
+  omp.wsloop {
+    // CHECK: omp.loop_nest (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      %1 = "test.payload"(%iv) : (index) -> (i32)
+      cf.br ^bb1(%1: i32)
+    ^bb1(%arg: i32):
+      memref.store %arg, %data1[%iv] : memref<?xi32>
       omp.yield
-  }) {operandSegmentSizes = array<i32: 1,1,1,1,1,0,0>, schedule_val = #omp<schedulekind static>} :
-    (index, index, index, memref<i32>, i32) -> ()
+    }
+    omp.terminator
+  }
 
-  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>, %{{.*}} = %{{.*}} : memref<i32>) schedule(static)
-  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  "omp.wsloop" (%lb, %ub, %step, %data_var, %data_var, %linear_var, %linear_var) ({
-    ^bb0(%iv: index):
+  omp.wsloop {
+    // CHECK: omp.loop_nest (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      %c = "test.condition"(%iv) : (index) -> (i1)
+      %v1 = "test.payload"(%iv) : (index) -> (i32)
+      cf.cond_br %c, ^bb1(%v1: i32), ^bb2(%v1: i32)
+    ^bb1(%arg0: i32):
+      memref.store %arg0, %data1[%iv] : memref<?xi32>
+      cf.br ^bb3
+    ^bb2(%arg1: i32):
+      memref.store %arg1, %data2[%iv] : memref<?xi32>
+      cf.br ^bb3
+    ^bb3:
       omp.yield
-  }) {operandSegmentSizes = array<i32: 1,1,1,2,2,0,0>, schedule_val = #omp<schedulekind static>} :
-    (index, index, index, memref<i32>, memref<i32>, i32, i32) -> ()
+    }
+    omp.terminator
+  }
 
-  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}}) ordered(2)
-  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  "omp.wsloop" (%lb, %ub, %step, %data_var, %linear_var, %chunk_var) ({
-    ^bb0(%iv: index):
+  omp.wsloop {
+    // CHECK: omp.loop_nest (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      %c = "test.condition"(%iv) : (index) -> (i1)
+      %v1 = "test.payload"(%iv) : (index) -> (i32)
+      cf.cond_br %c, ^bb1(%v1: i32), ^bb2(%v1: i32)
+    ^bb1(%arg0: i32):
+      memref.store %arg0, %data1[%iv] : memref<?xi32>
       omp.yield
-  }) {operandSegmentSizes = array<i32: 1,1,1,1,1,0,1>, schedule_val = #omp<schedulekind dynamic>, ordered_val = 2} :
-    (index, index, index, memref<i32>, i32, i32) -> ()
-
-  // CHECK: omp.wsloop schedule(auto) nowait
-  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  "omp.wsloop" (%lb, %ub, %step) ({
-    ^bb0(%iv: index):
+    ^bb2(%arg1: i32):
+      memref.store %arg1, %data2[%iv] : memref<?xi32>
       omp.yield
-  }) {operandSegmentSizes = array<i32: 1,1,1,0,0,0,0>, nowait, schedule_val = #omp<schedulekind auto>} :
-    (index, index, index) -> ()
+    }
+    omp.terminator
+  }
 
   return
 }
 
-// CHECK-LABEL: omp_wsloop_pretty
-func.func @omp_wsloop_pretty(%lb : index, %ub : index, %step : index, %data_var : memref<i32>, %linear_var : i32, %chunk_var : i32, %chunk_var2 : i16) -> () {
+// CHECK-LABEL: omp_loop_nest_pretty_non_index
+func.func @omp_loop_nest_pretty_non_index(%lb1 : i32, %ub1 : i32, %step1 : i32,
+    %lb2 : i64, %ub2 : i64, %step2 : i64, %data1 : memref<?xi32>,
+    %data2 : memref<?xi64>) -> () {
 
-  // CHECK: omp.wsloop ordered(2)
-  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop ordered(2)
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
-    omp.yield
+  omp.wsloop {
+    // CHECK: omp.loop_nest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
+    omp.loop_nest (%iv1) : i32 = (%lb1) to (%ub1) step (%step1) {
+      %1 = "test.payload"(%iv1) : (i32) -> (index)
+      cf.br ^bb1(%1: index)
+    ^bb1(%arg1: index):
+      memref.store %iv1, %data1[%arg1] : memref<?xi32>
+      omp.yield
+    }
+    omp.terminator
   }
 
-  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(static)
-  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop schedule(static) linear(%data_var = %linear_var : memref<i32>)
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
-    omp.yield
+  omp.wsloop {
+    // CHECK: omp.loop_nest (%{{.*}}) : i64 = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
+    omp.loop_nest (%iv2) : i64 = (%lb2) to (%ub2) step (%step2) {
+      %2 = "test.payload"(%iv2) : (i64) -> (index)
+      cf.br ^bb1(%2: index)
+    ^bb1(%arg2: index):
+      memref.store %iv2, %data2[%arg2] : memref<?xi64>
+      omp.yield
+    }
+    omp.terminator
   }
 
-  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(static = %{{.*}} : i32) ordered(2)
-  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop ordered(2) linear(%data_var = %linear_var : memref<i32>) schedule(static = %chunk_var : i32)
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
-    omp.yield
-  }
+  return
+}
 
-  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}} : i32, nonmonotonic) ordered(2)
-  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop ordered(2) linear(%data_var = %linear_var : memref<i32>) schedule(dynamic = %chunk_var : i32, nonmonotonic)
-  for (%iv) : index = (%lb) to (%ub) step (%step)  {
-    omp.yield
-  }
+// CHECK-LABEL: omp_loop_nest_pretty_multiple
+func.func @omp_loop_nest_pretty_multiple(%lb1 : i32, %ub1 : i32, %step1 : i32,
+    %lb2 : i32, %ub2 : i32, %step2 : i32, %data1 : memref<?xi32>) -> () {
 
-  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}} : i16, monotonic) ordered(2)
-  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop ordered(2) linear(%data_var = %linear_var : memref<i32>) schedule(dynamic = %chunk_var2 : i16, monotonic)
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
-    omp.yield
+  omp.wsloop {
+    // CHECK: omp.loop_nest (%{{.*}}, %{{.*}}) : i32 = (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}})
+    omp.loop_nest (%iv1, %iv2) : i32 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) {
+      %1 = "test.payload"(%iv1) : (i32) -> (index)
+      %2 = "test.payload"(%iv2) : (i32) -> (index)
+      memref.store %iv1, %data1[%1] : memref<?xi32>
+      memref.store %iv2, %data1[%2] : memref<?xi32>
+      omp.yield
+    }
+    omp.terminator
   }
 
-  // CHECK: omp.wsloop for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
-    omp.yield
-  }
+  return
+}
 
-  // CHECK: omp.wsloop for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}})
-  omp.wsloop for (%iv) : index = (%lb) to (%ub) inclusive step (%step) {
-    omp.yield
-  }
+// CHECK-LABEL: omp_wsloop
+func.func @omp_wsloop(%lb : index, %ub : index, %step : index, %data_var : memref<i32>, %linear_var : i32, %chunk_var : i32) -> () {
 
-  // CHECK: omp.wsloop nowait
-  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop nowait
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
-    omp.yield
-  }
+  // CHECK: omp.wsloop ordered(1) {
+  // CHECK-NEXT: omp.loop_nest
+  "omp.wsloop" () ({
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
+  }) {operandSegmentSizes = array<i32: 0,0,0,0>, ordered_val = 1} :
+    () -> ()
 
-  // CHECK: omp.wsloop nowait order(concurrent)
-  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop order(concurrent) nowait
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
-    omp.yield
-  }
+  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(static) {
+  // CHECK-NEXT: omp.loop_nest
+  "omp.wsloop" (%data_var, %linear_var) ({
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
+  }) {operandSegmentSizes = array<i32: 1,1,0,0>, schedule_val = #omp<schedulekind static>} :
+    (memref<i32>, i32) -> ()
+
+  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>, %{{.*}} = %{{.*}} : memref<i32>) schedule(static) {
+  // CHECK-NEXT: omp.loop_nest
+  "omp.wsloop" (%data_var, %data_var, %linear_var, %linear_var) ({
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
+  }) {operandSegmentSizes = array<i32: 2,2,0,0>, schedule_val = #omp<schedulekind static>} :
+    (memref<i32>, memref<i32>, i32, i32) -> ()
+
+  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}}) ordered(2) {
+  // CHECK-NEXT: omp.loop_nest
+  "omp.wsloop" (%data_var, %linear_var, %chunk_var) ({
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
+  }) {operandSegmentSizes = array<i32: 1,1,0,1>, schedule_val = #omp<schedulekind dynamic>, ordered_val = 2} :
+    (memref<i32>, i32, i32) -> ()
+
+  // CHECK: omp.wsloop schedule(auto) nowait {
+  // CHECK-NEXT: omp.loop_nest
+  "omp.wsloop" () ({
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
+  }) {operandSegmentSizes = array<i32: 0,0,0,0>, nowait, schedule_val = #omp<schedulekind auto>} :
+    () -> ()
+
+  // CHECK: omp.wsloop {
+  // CHECK-NEXT: omp.simd
+  // CHECK-NEXT: omp.loop_nest
+  "omp.wsloop" () ({
+    omp.simd {
+      omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+        omp.yield
+      }
+      omp.terminator
+    }
+    omp.terminator
+  }) : () -> ()
 
   return
 }
 
-// CHECK-LABEL: omp_wsloop_pretty_multi_block
-func.func @omp_wsloop_pretty_multi_block(%lb : index, %ub : index, %step : index, %data1 : memref<?xi32>, %data2 : memref<?xi32>) -> () {
+// CHECK-LABEL: omp_wsloop_pretty
+func.func @omp_wsloop_pretty(%lb : index, %ub : index, %step : index, %data_var : memref<i32>, %linear_var : i32, %chunk_var : i32, %chunk_var2 : i16) -> () {
 
-  // CHECK: omp.wsloop for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
-    %1 = "test.payload"(%iv) : (index) -> (i32)
-    cf.br ^bb1(%1: i32)
-  ^bb1(%arg: i32):
-    memref.store %arg, %data1[%iv] : memref<?xi32>
-    omp.yield
+  // CHECK: omp.wsloop ordered(2) {
+  // CHECK-NEXT: omp.loop_nest
+  omp.wsloop ordered(2) {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
   }
 
-  // CHECK: omp.wsloop for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
-    %c = "test.condition"(%iv) : (index) -> (i1)
-    %v1 = "test.payload"(%iv) : (index) -> (i32)
-    cf.cond_br %c, ^bb1(%v1: i32), ^bb2(%v1: i32)
-  ^bb1(%arg0: i32):
-    memref.store %arg0, %data1[%iv] : memref<?xi32>
-    cf.br ^bb3
-  ^bb2(%arg1: i32):
-    memref.store %arg1, %data2[%iv] : memref<?xi32>
-    cf.br ^bb3
-  ^bb3:
-    omp.yield
+  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(static) {
+  // CHECK-NEXT: omp.loop_nest
+  omp.wsloop schedule(static) linear(%data_var = %linear_var : memref<i32>) {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
   }
 
-  // CHECK: omp.wsloop for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
-    %c = "test.condition"(%iv) : (index) -> (i1)
-    %v1 = "test.payload"(%iv) : (index) -> (i32)
-    cf.cond_br %c, ^bb1(%v1: i32), ^bb2(%v1: i32)
-  ^bb1(%arg0: i32):
-    memref.store %arg0, %data1[%iv] : memref<?xi32>
-    omp.yield
-  ^bb2(%arg1: i32):
-    memref.store %arg1, %data2[%iv] : memref<?xi32>
-    omp.yield
+  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(static = %{{.*}} : i32) ordered(2) {
+  // CHECK-NEXT: omp.loop_nest
+  omp.wsloop ordered(2) linear(%data_var = %linear_var : memref<i32>) schedule(static = %chunk_var : i32) {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
   }
 
-  return
-}
-
-// CHECK-LABEL: omp_wsloop_pretty_non_index
-func.func @omp_wsloop_pretty_non_index(%lb1 : i32, %ub1 : i32, %step1 : i32, %lb2 : i64, %ub2 : i64, %step2 : i64,
-                           %data1 : memref<?xi32>, %data2 : memref<?xi64>) -> () {
+  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}} : i32, nonmonotonic) ordered(2) {
+  // CHECK-NEXT: omp.loop_nest
+  omp.wsloop ordered(2) linear(%data_var = %linear_var : memref<i32>) schedule(dynamic = %chunk_var : i32, nonmonotonic) {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step)  {
+      omp.yield
+    }
+    omp.terminator
+  }
 
-  // CHECK: omp.wsloop for (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop for (%iv1) : i32 = (%lb1) to (%ub1) step (%step1) {
-    %1 = "test.payload"(%iv1) : (i32) -> (index)
-    cf.br ^bb1(%1: index)
-  ^bb1(%arg1: index):
-    memref.store %iv1, %data1[%arg1] : memref<?xi32>
-    omp.yield
+  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}} : i16, monotonic) ordered(2) {
+  // CHECK-NEXT: omp.loop_nest
+  omp.wsloop ordered(2) linear(%data_var = %linear_var : memref<i32>) schedule(dynamic = %chunk_var2 : i16, monotonic) {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
   }
 
-  // CHECK: omp.wsloop for (%{{.*}}) : i64 = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop for (%iv2) : i64 = (%lb2) to (%ub2) step (%step2) {
-    %2 = "test.payload"(%iv2) : (i64) -> (index)
-    cf.br ^bb1(%2: index)
-  ^bb1(%arg2: index):
-    memref.store %iv2, %data2[%arg2] : memref<?xi64>
-    omp.yield
+  // CHECK: omp.wsloop {
+  // CHECK-NEXT: omp.loop_nest
+  omp.wsloop {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
   }
 
-  return
-}
+  // CHECK: omp.wsloop nowait {
+  // CHECK-NEXT: omp.loop_nest
+  omp.wsloop nowait {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
+  }
 
-// CHECK-LABEL: omp_wsloop_pretty_multiple
-func.func @omp_wsloop_pretty_multiple(%lb1 : i32, %ub1 : i32, %step1 : i32, %lb2 : i32, %ub2 : i32, %step2 : i32, %data1 : memref<?xi32>) -> () {
+  // CHECK: omp.wsloop nowait order(concurrent) {
+  // CHECK-NEXT: omp.loop_nest
+  omp.wsloop order(concurrent) nowait {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
+  }
 
-  // CHECK: omp.wsloop for (%{{.*}}, %{{.*}}) : i32 = (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}})
-  omp.wsloop for (%iv1, %iv2) : i32 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) {
-    %1 = "test.payload"(%iv1) : (i32) -> (index)
-    %2 = "test.payload"(%iv2) : (i32) -> (index)
-    memref.store %iv1, %data1[%1] : memref<?xi32>
-    memref.store %iv2, %data1[%2] : memref<?xi32>
-    omp.yield
+  // CHECK: omp.wsloop {
+  // CHECK-NEXT: omp.simd
+  // CHECK-NEXT: omp.loop_nest
+  omp.wsloop {
+    omp.simd {
+      omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+        omp.yield
+      }
+      omp.terminator
+    }
+    omp.terminator
   }
 
   return
@@ -712,17 +776,19 @@ func.func @wsloop_reduction(%lb : index, %ub : index, %step : index) {
   %c1 = arith.constant 1 : i32
   %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
   // CHECK: reduction(@add_f32 %{{.+}} -> %[[PRV:.+]] : !llvm.ptr)
-  omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr)
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
-    // CHECK: %[[CST:.+]] = arith.constant 2.0{{.*}} : f32
-    %cst = arith.constant 2.0 : f32
-    // CHECK: %[[LPRV:.+]] = llvm.load %[[PRV]] : !llvm.ptr -> f32
-    %lprv = llvm.load %prv : !llvm.ptr -> f32
-    // CHECK: %[[RES:.+]] = llvm.fadd %[[LPRV]], %[[CST]] : f32
-    %res = llvm.fadd %lprv, %cst: f32
-    // CHECK: llvm.store %[[RES]], %[[PRV]] :  f32, !llvm.ptr
-    llvm.store %res, %prv :  f32, !llvm.ptr
-    omp.yield
+  omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr) {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      // CHECK: %[[CST:.+]] = arith.constant 2.0{{.*}} : f32
+      %cst = arith.constant 2.0 : f32
+      // CHECK: %[[LPRV:.+]] = llvm.load %[[PRV]] : !llvm.ptr -> f32
+      %lprv = llvm.load %prv : !llvm.ptr -> f32
+      // CHECK: %[[RES:.+]] = llvm.fadd %[[LPRV]], %[[CST]] : f32
+      %res = llvm.fadd %lprv, %cst: f32
+      // CHECK: llvm.store %[[RES]], %[[PRV]] :  f32, !llvm.ptr
+      llvm.store %res, %prv :  f32, !llvm.ptr
+      omp.yield
+    }
+    omp.terminator
   }
   return
 }
@@ -749,14 +815,19 @@ func.func @parallel_wsloop_reduction(%lb : index, %ub : index, %step : index) {
   %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
   // CHECK: omp.parallel reduction(@add_f32 %{{.*}} -> %{{.+}} : !llvm.ptr) {
   omp.parallel reduction(@add_f32 %0 -> %prv : !llvm.ptr) {
-    // CHECK: omp.wsloop for (%{{.+}}) : index = (%{{.+}}) to (%{{.+}}) step (%{{.+}})
-    omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
-      %1 = arith.constant 2.0 : f32
-      %2 = llvm.load %prv : !llvm.ptr -> f32
-      // CHECK: llvm.fadd %{{.+}}, %{{.+}} : f32
-      llvm.fadd %1, %2 : f32
-      // CHECK: omp.yield
-      omp.yield
+    // CHECK: omp.wsloop {
+    omp.wsloop {
+      // CHECK: omp.loop_nest (%{{.+}}) : index = (%{{.+}}) to (%{{.+}}) step (%{{.+}}) {
+      omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+        %1 = arith.constant 2.0 : f32
+        %2 = llvm.load %prv : !llvm.ptr -> f32
+        // CHECK: llvm.fadd %{{.+}}, %{{.+}} : f32
+        llvm.fadd %1, %2 : f32
+        // CHECK: omp.yield
+        omp.yield
+      }
+      // CHECK: omp.terminator
+      omp.terminator
     }
     // CHECK: omp.terminator
     omp.terminator
@@ -879,16 +950,18 @@ combiner {
 // CHECK-LABEL: func @wsloop_reduction2
 func.func @wsloop_reduction2(%lb : index, %ub : index, %step : index) {
   %0 = memref.alloca() : memref<1xf32>
-  // CHECK: omp.wsloop reduction(@add2_f32 %{{.+}} -> %{{.+}} : memref<1xf32>)
-  omp.wsloop reduction(@add2_f32 %0 -> %prv : memref<1xf32>)
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
-    %1 = arith.constant 2.0 : f32
-    %2 = arith.constant 0 : index
-    %3 = memref.load %prv[%2] : memref<1xf32>
-    // CHECK: llvm.fadd
-    %4 = llvm.fadd %1, %3 : f32
-    memref.store %4, %prv[%2] : memref<1xf32>
-    omp.yield
+  // CHECK: omp.wsloop reduction(@add2_f32 %{{.+}} -> %{{.+}} : memref<1xf32>) {
+  omp.wsloop reduction(@add2_f32 %0 -> %prv : memref<1xf32>) {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      %1 = arith.constant 2.0 : f32
+      %2 = arith.constant 0 : index
+      %3 = memref.load %prv[%2] : memref<1xf32>
+      // CHECK: llvm.fadd
+      %4 = llvm.fadd %1, %3 : f32
+      memref.store %4, %prv[%2] : memref<1xf32>
+      omp.yield
+    }
+    omp.terminator
   }
   return
 }
@@ -915,14 +988,19 @@ func.func @parallel_wsloop_reduction2(%lb : index, %ub : index, %step : index) {
   %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
   // CHECK: omp.parallel reduction(@add2_f32 %{{.*}} -> %{{.+}} : !llvm.ptr) {
   omp.parallel reduction(@add2_f32 %0 -> %prv : !llvm.ptr) {
-    // CHECK: omp.wsloop for (%{{.+}}) : index = (%{{.+}}) to (%{{.+}}) step (%{{.+}})
-    omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
-      %1 = arith.constant 2.0 : f32
-      %2 = llvm.load %prv : !llvm.ptr -> f32
-      // CHECK: llvm.fadd %{{.+}}, %{{.+}} : f32
-      %3 = llvm.fadd %1, %2 : f32
-      // CHECK: omp.yield
-      omp.yield
+    // CHECK: omp.wsloop {
+    omp.wsloop {
+      // CHECK: omp.loop_nest (%{{.+}}) : index = (%{{.+}}) to (%{{.+}}) step (%{{.+}}) {
+      omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+        %1 = arith.constant 2.0 : f32
+        %2 = llvm.load %prv : !llvm.ptr -> f32
+        // CHECK: llvm.fadd %{{.+}}, %{{.+}} : f32
+        %3 = llvm.fadd %1, %2 : f32
+        // CHECK: omp.yield
+        omp.yield
+      }
+      // CHECK: omp.terminator
+      omp.terminator
     }
     // CHECK: omp.terminator
     omp.terminator
@@ -996,36 +1074,44 @@ func.func @omp_ordered(%arg1 : i32, %arg2 : i32, %arg3 : i32,
     omp.terminator
   }
 
-  omp.wsloop ordered(0)
-  for (%0) : i32 = (%arg1) to (%arg2) step (%arg3)  {
-    omp.ordered.region {
-      omp.terminator
+  omp.wsloop ordered(0) {
+    omp.loop_nest (%0) : i32 = (%arg1) to (%arg2) step (%arg3)  {
+      // CHECK: omp.ordered.region
+      omp.ordered.region {
+        // CHECK: omp.terminator
+        omp.terminator
+      }
+      omp.yield
     }
-    omp.yield
+    omp.terminator
   }
 
-  omp.wsloop ordered(1)
-  for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
-    // Only one DEPEND(SINK: vec) clause
-    // CHECK: omp.ordered depend_type(dependsink) depend_vec(%{{.*}} : i64) {num_loops_val = 1 : i64}
-    omp.ordered depend_type(dependsink) depend_vec(%vec0 : i64) {num_loops_val = 1 : i64}
+  omp.wsloop ordered(1) {
+    omp.loop_nest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
+      // Only one DEPEND(SINK: vec) clause
+      // CHECK: omp.ordered depend_type(dependsink) depend_vec(%{{.*}} : i64) {num_loops_val = 1 : i64}
+      omp.ordered depend_type(dependsink) depend_vec(%vec0 : i64) {num_loops_val = 1 : i64}
 
-    // CHECK: omp.ordered depend_type(dependsource) depend_vec(%{{.*}} : i64) {num_loops_val = 1 : i64}
-    omp.ordered depend_type(dependsource) depend_vec(%vec0 : i64) {num_loops_val = 1 : i64}
+      // CHECK: omp.ordered depend_type(dependsource) depend_vec(%{{.*}} : i64) {num_loops_val = 1 : i64}
+      omp.ordered depend_type(dependsource) depend_vec(%vec0 : i64) {num_loops_val = 1 : i64}
 
-    omp.yield
+      omp.yield
+    }
+    omp.terminator
   }
 
-  omp.wsloop ordered(2)
-  for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
-    // Multiple DEPEND(SINK: vec) clauses
-    // CHECK: omp.ordered depend_type(dependsink) depend_vec(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : i64, i64, i64, i64) {num_loops_val = 2 : i64}
-    omp.ordered depend_type(dependsink) depend_vec(%vec0, %vec1, %vec2, %vec3 : i64, i64, i64, i64) {num_loops_val = 2 : i64}
+  omp.wsloop ordered(2) {
+    omp.loop_nest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
+      // Multiple DEPEND(SINK: vec) clauses
+      // CHECK: omp.ordered depend_type(dependsink) depend_vec(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : i64, i64, i64, i64) {num_loops_val = 2 : i64}
+      omp.ordered depend_type(dependsink) depend_vec(%vec0, %vec1, %vec2, %vec3 : i64, i64, i64, i64) {num_loops_val = 2 : i64}
 
-    // CHECK: omp.ordered depend_type(dependsource) depend_vec(%{{.*}}, %{{.*}} : i64, i64) {num_loops_val = 2 : i64}
-    omp.ordered depend_type(dependsource) depend_vec(%vec0, %vec1 : i64, i64) {num_loops_val = 2 : i64}
+      // CHECK: omp.ordered depend_type(dependsource) depend_vec(%{{.*}}, %{{.*}} : i64, i64) {num_loops_val = 2 : i64}
+      omp.ordered depend_type(dependsource) depend_vec(%vec0, %vec1 : i64, i64) {num_loops_val = 2 : i64}
 
-    omp.yield
+      omp.yield
+    }
+    omp.terminator
   }
 
   return
@@ -1876,11 +1962,13 @@ func.func @omp_cancel_parallel(%if_cond : i1) -> () {
 }
 
 func.func @omp_cancel_wsloop(%lb : index, %ub : index, %step : index) {
-  omp.wsloop
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
-    // CHECK: omp.cancel cancellation_construct_type(loop)
-    omp.cancel cancellation_construct_type(loop)
-    // CHECK: omp.terminator
+  omp.wsloop {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      // CHECK: omp.cancel cancellation_construct_type(loop)
+      omp.cancel cancellation_construct_type(loop)
+      // CHECK: omp.yield
+      omp.yield
+    }
     omp.terminator
   }
   return
@@ -1911,13 +1999,15 @@ func.func @omp_cancellationpoint_parallel() -> () {
 }
 
 func.func @omp_cancellationpoint_wsloop(%lb : index, %ub : index, %step : index) {
-  omp.wsloop
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
-    // CHECK: omp.cancellation_point cancellation_construct_type(loop)
-    omp.cancellation_point cancellation_construct_type(loop)
-    // CHECK: omp.cancel cancellation_construct_type(loop)
-    omp.cancel cancellation_construct_type(loop)
-    // CHECK: omp.terminator
+  omp.wsloop {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      // CHECK: omp.cancellation_point cancellation_construct_type(loop)
+      omp.cancellation_point cancellation_construct_type(loop)
+      // CHECK: omp.cancel cancellation_construct_type(loop)
+      omp.cancel cancellation_construct_type(loop)
+      // CHECK: omp.yield
+      omp.yield
+    }
     omp.terminator
   }
   return
diff --git a/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir b/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir
index 8ab50f05f07167..bb009ee62d8cc3 100644
--- a/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir
@@ -12,10 +12,13 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
       %loop_ub = llvm.mlir.constant(9 : i32) : i32
       %loop_lb = llvm.mlir.constant(0 : i32) : i32
       %loop_step = llvm.mlir.constant(1 : i32) : i32
-      omp.wsloop for  (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) {
-        %gep = llvm.getelementptr %arg0[0, %loop_cnt] : (!llvm.ptr, i32) -> !llvm.ptr, !llvm.array<10 x i32>
-        llvm.store %loop_cnt, %gep : i32, !llvm.ptr
-        omp.yield
+      omp.wsloop {
+        omp.loop_nest (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) {
+          %gep = llvm.getelementptr %arg0[0, %loop_cnt] : (!llvm.ptr, i32) -> !llvm.ptr, !llvm.array<10 x i32>
+          llvm.store %loop_cnt, %gep : i32, !llvm.ptr
+          omp.yield
+        }
+        omp.terminator
       }
      omp.terminator
     }
diff --git a/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir b/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir
index e246c551886cfa..6c735b77d893a0 100644
--- a/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir
@@ -8,13 +8,16 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
     %loop_ub = llvm.mlir.constant(99 : i32) : i32
     %loop_lb = llvm.mlir.constant(0 : i32) : i32
     %loop_step = llvm.mlir.constant(1 : index) : i32
-    omp.wsloop for  (%arg1, %arg2) : i32 = (%loop_lb, %loop_lb) to (%loop_ub, %loop_ub) inclusive step (%loop_step, %loop_step) {
-      %1 = llvm.add %arg1, %arg2  : i32
-      %2 = llvm.mul %arg2, %loop_ub overflow<nsw>  : i32
-      %3 = llvm.add %arg1, %2 :i32
-      %4 = llvm.getelementptr %arg0[%3] : (!llvm.ptr, i32) -> !llvm.ptr, i32
-      llvm.store %1, %4 : i32, !llvm.ptr
-      omp.yield
+    omp.wsloop {
+      omp.loop_nest (%arg1, %arg2) : i32 = (%loop_lb, %loop_lb) to (%loop_ub, %loop_ub) inclusive step (%loop_step, %loop_step) {
+        %1 = llvm.add %arg1, %arg2  : i32
+        %2 = llvm.mul %arg2, %loop_ub overflow<nsw>  : i32
+        %3 = llvm.add %arg1, %2 :i32
+        %4 = llvm.getelementptr %arg0[%3] : (!llvm.ptr, i32) -> !llvm.ptr, i32
+        llvm.store %1, %4 : i32, !llvm.ptr
+        omp.yield
+      }
+      omp.terminator
     }
     llvm.return
   }
diff --git a/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir b/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir
index 220eb85b3483ec..f9400ec34a6c59 100644
--- a/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir
@@ -8,10 +8,13 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
       %loop_ub = llvm.mlir.constant(9 : i32) : i32
       %loop_lb = llvm.mlir.constant(0 : i32) : i32
       %loop_step = llvm.mlir.constant(1 : i32) : i32
-      omp.wsloop for  (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) {
-        %gep = llvm.getelementptr %arg0[0, %loop_cnt] : (!llvm.ptr, i32) -> !llvm.ptr, !llvm.array<10 x i32>
-        llvm.store %loop_cnt, %gep : i32, !llvm.ptr
-        omp.yield
+      omp.wsloop {
+        omp.loop_nest (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) {
+          %gep = llvm.getelementptr %arg0[0, %loop_cnt] : (!llvm.ptr, i32) -> !llvm.ptr, !llvm.array<10 x i32>
+          llvm.store %loop_cnt, %gep : i32, !llvm.ptr
+          omp.yield
+        }
+        omp.terminator
       }
     llvm.return
   }
@@ -20,8 +23,11 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
       %loop_ub = llvm.mlir.constant(9 : i32) : i32
       %loop_lb = llvm.mlir.constant(0 : i32) : i32
       %loop_step = llvm.mlir.constant(1 : i32) : i32
-      omp.wsloop for  (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) {
-        omp.yield
+      omp.wsloop {
+        omp.loop_nest (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) {
+          omp.yield
+        }
+        omp.terminator
       }
     llvm.return
   }
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index d1390022c1dc44..ad40ca26bec9f8 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -320,18 +320,20 @@ llvm.func @wsloop_simple(%arg0: !llvm.ptr) {
   %1 = llvm.mlir.constant(10 : index) : i64
   %2 = llvm.mlir.constant(1 : index) : i64
   omp.parallel {
-    "omp.wsloop"(%1, %0, %2) ({
-    ^bb0(%arg1: i64):
-      // The form of the emitted IR is controlled by OpenMPIRBuilder and
-      // tested there. Just check that the right functions are called.
-      // CHECK: call i32 @__kmpc_global_thread_num
-      // CHECK: call void @__kmpc_for_static_init_{{.*}}(ptr @[[$loc_struct]],
-      %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32
-      %4 = llvm.getelementptr %arg0[%arg1] : (!llvm.ptr, i64) -> !llvm.ptr, f32
-      llvm.store %3, %4 : f32, !llvm.ptr
-      omp.yield
+    "omp.wsloop"() ({
+      omp.loop_nest (%arg1) : i64 = (%1) to (%0) step (%2) {
+        // The form of the emitted IR is controlled by OpenMPIRBuilder and
+        // tested there. Just check that the right functions are called.
+        // CHECK: call i32 @__kmpc_global_thread_num
+        // CHECK: call void @__kmpc_for_static_init_{{.*}}(ptr @[[$loc_struct]],
+        %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32
+        %4 = llvm.getelementptr %arg0[%arg1] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+        llvm.store %3, %4 : f32, !llvm.ptr
+        omp.yield
+      }
+      omp.terminator
       // CHECK: call void @__kmpc_for_static_fini(ptr @[[$loc_struct]],
-    }) {operandSegmentSizes = array<i32: 1, 1, 1, 0, 0, 0, 0>} : (i64, i64, i64) -> ()
+    }) : () -> ()
     omp.terminator
   }
   llvm.return
@@ -345,13 +347,15 @@ llvm.func @wsloop_inclusive_1(%arg0: !llvm.ptr) {
   %1 = llvm.mlir.constant(10 : index) : i64
   %2 = llvm.mlir.constant(1 : index) : i64
   // CHECK: store i64 31, ptr %{{.*}}upperbound
-  "omp.wsloop"(%1, %0, %2) ({
-  ^bb0(%arg1: i64):
-    %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32
-    %4 = llvm.getelementptr %arg0[%arg1] : (!llvm.ptr, i64) -> !llvm.ptr, f32
-    llvm.store %3, %4 : f32, !llvm.ptr
-    omp.yield
-  }) {operandSegmentSizes = array<i32: 1, 1, 1, 0, 0, 0, 0>} : (i64, i64, i64) -> ()
+  "omp.wsloop"() ({
+    omp.loop_nest (%arg1) : i64 = (%1) to (%0) step (%2) {
+      %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32
+      %4 = llvm.getelementptr %arg0[%arg1] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+      llvm.store %3, %4 : f32, !llvm.ptr
+      omp.yield
+    }
+    omp.terminator
+  }) : () -> ()
   llvm.return
 }
 
@@ -363,13 +367,15 @@ llvm.func @wsloop_inclusive_2(%arg0: !llvm.ptr) {
   %1 = llvm.mlir.constant(10 : index) : i64
   %2 = llvm.mlir.constant(1 : index) : i64
   // CHECK: store i64 32, ptr %{{.*}}upperbound
-  "omp.wsloop"(%1, %0, %2) ({
-  ^bb0(%arg1: i64):
-    %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32
-    %4 = llvm.getelementptr %arg0[%arg1] : (!llvm.ptr, i64) -> !llvm.ptr, f32
-    llvm.store %3, %4 : f32, !llvm.ptr
-    omp.yield
-  }) {inclusive, operandSegmentSizes = array<i32: 1, 1, 1, 0, 0, 0, 0>} : (i64, i64, i64) -> ()
+  "omp.wsloop"() ({
+    omp.loop_nest (%arg1) : i64 = (%1) to (%0) inclusive step (%2) {
+      %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32
+      %4 = llvm.getelementptr %arg0[%arg1] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+      llvm.store %3, %4 : f32, !llvm.ptr
+      omp.yield
+    }
+    omp.terminator
+  }) : () -> ()
   llvm.return
 }
 
@@ -379,14 +385,16 @@ llvm.func @body(i32)
 
 // CHECK-LABEL: @test_omp_wsloop_static_defchunk
 llvm.func @test_omp_wsloop_static_defchunk(%lb : i32, %ub : i32, %step : i32) -> () {
- omp.wsloop schedule(static)
- for (%iv) : i32 = (%lb) to (%ub) step (%step) {
-   // CHECK: call void @__kmpc_for_static_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 34, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, i32 1, i32 0)
-   // CHECK: call void @__kmpc_for_static_fini
-   llvm.call @body(%iv) : (i32) -> ()
-   omp.yield
- }
- llvm.return
+  omp.wsloop schedule(static) {
+    omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
+      // CHECK: call void @__kmpc_for_static_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 34, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, i32 1, i32 0)
+      // CHECK: call void @__kmpc_for_static_fini
+      llvm.call @body(%iv) : (i32) -> ()
+      omp.yield
+    }
+    omp.terminator
+  }
+  llvm.return
 }
 
 // -----
@@ -395,15 +403,17 @@ llvm.func @body(i32)
 
 // CHECK-LABEL: @test_omp_wsloop_static_1
 llvm.func @test_omp_wsloop_static_1(%lb : i32, %ub : i32, %step : i32) -> () {
- %static_chunk_size = llvm.mlir.constant(1 : i32) : i32
- omp.wsloop schedule(static = %static_chunk_size : i32)
- for (%iv) : i32 = (%lb) to (%ub) step (%step) {
-   // CHECK: call void @__kmpc_for_static_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 33, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, i32 1, i32 1)
-   // CHECK: call void @__kmpc_for_static_fini
-   llvm.call @body(%iv) : (i32) -> ()
-   omp.yield
- }
- llvm.return
+  %static_chunk_size = llvm.mlir.constant(1 : i32) : i32
+  omp.wsloop schedule(static = %static_chunk_size : i32) {
+    omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
+      // CHECK: call void @__kmpc_for_static_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 33, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, i32 1, i32 1)
+      // CHECK: call void @__kmpc_for_static_fini
+      llvm.call @body(%iv) : (i32) -> ()
+      omp.yield
+    }
+    omp.terminator
+  }
+  llvm.return
 }
 
 // -----
@@ -412,15 +422,17 @@ llvm.func @body(i32)
 
 // CHECK-LABEL: @test_omp_wsloop_static_2
 llvm.func @test_omp_wsloop_static_2(%lb : i32, %ub : i32, %step : i32) -> () {
- %static_chunk_size = llvm.mlir.constant(2 : i32) : i32
- omp.wsloop schedule(static = %static_chunk_size : i32)
- for (%iv) : i32 = (%lb) to (%ub) step (%step) {
-   // CHECK: call void @__kmpc_for_static_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 33, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, i32 1, i32 2)
-   // CHECK: call void @__kmpc_for_static_fini
-   llvm.call @body(%iv) : (i32) -> ()
-   omp.yield
- }
- llvm.return
+  %static_chunk_size = llvm.mlir.constant(2 : i32) : i32
+  omp.wsloop schedule(static = %static_chunk_size : i32) {
+    omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
+      // CHECK: call void @__kmpc_for_static_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 33, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, i32 1, i32 2)
+      // CHECK: call void @__kmpc_for_static_fini
+      llvm.call @body(%iv) : (i32) -> ()
+      omp.yield
+    }
+    omp.terminator
+  }
+  llvm.return
 }
 
 // -----
@@ -428,16 +440,18 @@ llvm.func @test_omp_wsloop_static_2(%lb : i32, %ub : i32, %step : i32) -> () {
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_dynamic(%lb : i64, %ub : i64, %step : i64) -> () {
- omp.wsloop schedule(dynamic)
- for (%iv) : i64 = (%lb) to (%ub) step (%step)  {
-  // CHECK: call void @__kmpc_dispatch_init_8u
-  // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
-  // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
-  // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
-   llvm.call @body(%iv) : (i64) -> ()
-   omp.yield
- }
- llvm.return
+  omp.wsloop schedule(dynamic) {
+    omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step)  {
+      // CHECK: call void @__kmpc_dispatch_init_8u
+      // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
+      // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
+      // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
+      llvm.call @body(%iv) : (i64) -> ()
+      omp.yield
+    }
+    omp.terminator
+  }
+  llvm.return
 }
 
 // -----
@@ -445,17 +459,19 @@ llvm.func @test_omp_wsloop_dynamic(%lb : i64, %ub : i64, %step : i64) -> () {
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_dynamic_chunk_const(%lb : i64, %ub : i64, %step : i64) -> () {
- %chunk_size_const = llvm.mlir.constant(2 : i16) : i16
- omp.wsloop schedule(dynamic = %chunk_size_const : i16)
- for (%iv) : i64 = (%lb) to (%ub) step (%step)  {
-  // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741859, i64 {{.*}}, i64 %{{.*}}, i64 {{.*}}, i64 2)
-  // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
-  // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
-  // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
-   llvm.call @body(%iv) : (i64) -> ()
-   omp.yield
- }
- llvm.return
+  %chunk_size_const = llvm.mlir.constant(2 : i16) : i16
+  omp.wsloop schedule(dynamic = %chunk_size_const : i16) {
+    omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step)  {
+      // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741859, i64 {{.*}}, i64 %{{.*}}, i64 {{.*}}, i64 2)
+      // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
+      // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
+      // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
+      llvm.call @body(%iv) : (i64) -> ()
+      omp.yield
+    }
+    omp.terminator
+  }
+  llvm.return
 }
 
 // -----
@@ -463,20 +479,22 @@ llvm.func @test_omp_wsloop_dynamic_chunk_const(%lb : i64, %ub : i64, %step : i64
 llvm.func @body(i32)
 
 llvm.func @test_omp_wsloop_dynamic_chunk_var(%lb : i32, %ub : i32, %step : i32) -> () {
- %1 = llvm.mlir.constant(1 : i64) : i64
- %chunk_size_alloca = llvm.alloca %1 x i16 {bindc_name = "chunk_size", in_type = i16, uniq_name = "_QFsub1Echunk_size"} : (i64) -> !llvm.ptr
- %chunk_size_var = llvm.load %chunk_size_alloca : !llvm.ptr -> i16
- omp.wsloop schedule(dynamic = %chunk_size_var : i16)
- for (%iv) : i32 = (%lb) to (%ub) step (%step) {
-  // CHECK: %[[CHUNK_SIZE:.*]] = sext i16 %{{.*}} to i32
-  // CHECK: call void @__kmpc_dispatch_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741859, i32 {{.*}}, i32 %{{.*}}, i32 {{.*}}, i32 %[[CHUNK_SIZE]])
-  // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u
-  // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
-  // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
-   llvm.call @body(%iv) : (i32) -> ()
-   omp.yield
- }
- llvm.return
+  %1 = llvm.mlir.constant(1 : i64) : i64
+  %chunk_size_alloca = llvm.alloca %1 x i16 {bindc_name = "chunk_size", in_type = i16, uniq_name = "_QFsub1Echunk_size"} : (i64) -> !llvm.ptr
+  %chunk_size_var = llvm.load %chunk_size_alloca : !llvm.ptr -> i16
+  omp.wsloop schedule(dynamic = %chunk_size_var : i16) {
+    omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
+      // CHECK: %[[CHUNK_SIZE:.*]] = sext i16 %{{.*}} to i32
+      // CHECK: call void @__kmpc_dispatch_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741859, i32 {{.*}}, i32 %{{.*}}, i32 {{.*}}, i32 %[[CHUNK_SIZE]])
+      // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u
+      // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
+      // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
+      llvm.call @body(%iv) : (i32) -> ()
+      omp.yield
+    }
+    omp.terminator
+  }
+  llvm.return
 }
 
 // -----
@@ -484,20 +502,22 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var(%lb : i32, %ub : i32, %step : i32)
 llvm.func @body(i32)
 
 llvm.func @test_omp_wsloop_dynamic_chunk_var2(%lb : i32, %ub : i32, %step : i32) -> () {
- %1 = llvm.mlir.constant(1 : i64) : i64
- %chunk_size_alloca = llvm.alloca %1 x i64 {bindc_name = "chunk_size", in_type = i64, uniq_name = "_QFsub1Echunk_size"} : (i64) -> !llvm.ptr
- %chunk_size_var = llvm.load %chunk_size_alloca : !llvm.ptr -> i64
- omp.wsloop schedule(dynamic = %chunk_size_var : i64)
- for (%iv) : i32 = (%lb) to (%ub) step (%step) {
-  // CHECK: %[[CHUNK_SIZE:.*]] = trunc i64 %{{.*}} to i32
-  // CHECK: call void @__kmpc_dispatch_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741859, i32 {{.*}}, i32 %{{.*}}, i32 {{.*}}, i32 %[[CHUNK_SIZE]])
-  // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u
-  // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
-  // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
-   llvm.call @body(%iv) : (i32) -> ()
-   omp.yield
- }
- llvm.return
+  %1 = llvm.mlir.constant(1 : i64) : i64
+  %chunk_size_alloca = llvm.alloca %1 x i64 {bindc_name = "chunk_size", in_type = i64, uniq_name = "_QFsub1Echunk_size"} : (i64) -> !llvm.ptr
+  %chunk_size_var = llvm.load %chunk_size_alloca : !llvm.ptr -> i64
+  omp.wsloop schedule(dynamic = %chunk_size_var : i64) {
+    omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
+      // CHECK: %[[CHUNK_SIZE:.*]] = trunc i64 %{{.*}} to i32
+      // CHECK: call void @__kmpc_dispatch_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741859, i32 {{.*}}, i32 %{{.*}}, i32 {{.*}}, i32 %[[CHUNK_SIZE]])
+      // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u
+      // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
+      // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
+      llvm.call @body(%iv) : (i32) -> ()
+      omp.yield
+    }
+    omp.terminator
+  }
+  llvm.return
 }
 
 // -----
@@ -505,16 +525,18 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var2(%lb : i32, %ub : i32, %step : i32)
 llvm.func @body(i32)
 
 llvm.func @test_omp_wsloop_dynamic_chunk_var3(%lb : i32, %ub : i32, %step : i32, %chunk_size : i32) -> () {
- omp.wsloop schedule(dynamic = %chunk_size : i32)
- for (%iv) : i32 = (%lb) to (%ub) step (%step) {
-  // CHECK: call void @__kmpc_dispatch_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741859, i32 {{.*}}, i32 %{{.*}}, i32 {{.*}}, i32 %{{.*}})
-  // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u
-  // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
-  // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
-   llvm.call @body(%iv) : (i32) -> ()
-   omp.yield
- }
- llvm.return
+  omp.wsloop schedule(dynamic = %chunk_size : i32) {
+    omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
+      // CHECK: call void @__kmpc_dispatch_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741859, i32 {{.*}}, i32 %{{.*}}, i32 {{.*}}, i32 %{{.*}})
+      // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u
+      // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
+      // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
+      llvm.call @body(%iv) : (i32) -> ()
+      omp.yield
+    }
+    omp.terminator
+  }
+  llvm.return
 }
 
 // -----
@@ -522,16 +544,18 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var3(%lb : i32, %ub : i32, %step : i32,
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_auto(%lb : i64, %ub : i64, %step : i64) -> () {
- omp.wsloop schedule(auto)
- for (%iv) : i64 = (%lb) to (%ub) step (%step) {
-  // CHECK: call void @__kmpc_dispatch_init_8u
-  // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
-  // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
-  // CHECK  br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
-   llvm.call @body(%iv) : (i64) -> ()
-   omp.yield
- }
- llvm.return
+  omp.wsloop schedule(auto) {
+    omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
+      // CHECK: call void @__kmpc_dispatch_init_8u
+      // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
+      // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
+      // CHECK  br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
+      llvm.call @body(%iv) : (i64) -> ()
+      omp.yield
+    }
+    omp.terminator
+  }
+  llvm.return
 }
 
 // -----
@@ -539,14 +563,16 @@ llvm.func @test_omp_wsloop_auto(%lb : i64, %ub : i64, %step : i64) -> () {
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_runtime(%lb : i64, %ub : i64, %step : i64) -> () {
-  omp.wsloop schedule(runtime)
-  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
-    // CHECK: call void @__kmpc_dispatch_init_8u
-    // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
-    // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
-    // CHECK  br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
-    llvm.call @body(%iv) : (i64) -> ()
-    omp.yield
+  omp.wsloop schedule(runtime) {
+    omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
+      // CHECK: call void @__kmpc_dispatch_init_8u
+      // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
+      // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
+      // CHECK  br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
+      llvm.call @body(%iv) : (i64) -> ()
+      omp.yield
+    }
+    omp.terminator
   }
   llvm.return
 }
@@ -556,14 +582,16 @@ llvm.func @test_omp_wsloop_runtime(%lb : i64, %ub : i64, %step : i64) -> () {
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_guided(%lb : i64, %ub : i64, %step : i64) -> () {
-  omp.wsloop schedule(guided)
-  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
-    // CHECK: call void @__kmpc_dispatch_init_8u
-    // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
-    // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
-    // CHECK  br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
-    llvm.call @body(%iv) : (i64) -> ()
-    omp.yield
+  omp.wsloop schedule(guided) {
+    omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
+      // CHECK: call void @__kmpc_dispatch_init_8u
+      // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
+      // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
+      // CHECK  br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
+      llvm.call @body(%iv) : (i64) -> ()
+      omp.yield
+    }
+    omp.terminator
   }
   llvm.return
 }
@@ -573,14 +601,16 @@ llvm.func @test_omp_wsloop_guided(%lb : i64, %ub : i64, %step : i64) -> () {
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_dynamic_nonmonotonic(%lb : i64, %ub : i64, %step : i64) -> () {
-  omp.wsloop schedule(dynamic, nonmonotonic)
-  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
-    // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741859
-    // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
-    // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
-    // CHECK  br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
-    llvm.call @body(%iv) : (i64) -> ()
-    omp.yield
+  omp.wsloop schedule(dynamic, nonmonotonic) {
+    omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
+      // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741859
+      // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
+      // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
+      // CHECK  br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
+      llvm.call @body(%iv) : (i64) -> ()
+      omp.yield
+    }
+    omp.terminator
   }
   llvm.return
 }
@@ -590,14 +620,16 @@ llvm.func @test_omp_wsloop_dynamic_nonmonotonic(%lb : i64, %ub : i64, %step : i6
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_dynamic_monotonic(%lb : i64, %ub : i64, %step : i64) -> () {
-  omp.wsloop schedule(dynamic, monotonic)
-  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
-    // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 536870947
-    // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
-    // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
-    // CHECK  br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
-    llvm.call @body(%iv) : (i64) -> ()
-    omp.yield
+  omp.wsloop schedule(dynamic, monotonic) {
+    omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
+      // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 536870947
+      // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
+      // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
+      // CHECK  br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
+      llvm.call @body(%iv) : (i64) -> ()
+      omp.yield
+    }
+    omp.terminator
   }
   llvm.return
 }
@@ -607,14 +639,16 @@ llvm.func @test_omp_wsloop_dynamic_monotonic(%lb : i64, %ub : i64, %step : i64)
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_runtime_simd(%lb : i64, %ub : i64, %step : i64) -> () {
-  omp.wsloop schedule(runtime, simd)
-  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
-    // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741871
-    // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
-    // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
-    // CHECK  br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
-    llvm.call @body(%iv) : (i64) -> ()
-    omp.yield
+  omp.wsloop schedule(runtime, simd) {
+    omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
+      // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741871
+      // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
+      // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
+      // CHECK  br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
+      llvm.call @body(%iv) : (i64) -> ()
+      omp.yield
+    }
+    omp.terminator
   }
   llvm.return
 }
@@ -624,14 +658,16 @@ llvm.func @test_omp_wsloop_runtime_simd(%lb : i64, %ub : i64, %step : i64) -> ()
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_guided_simd(%lb : i64, %ub : i64, %step : i64) -> () {
-  omp.wsloop schedule(guided, simd)
-  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
-    // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741870
-    // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
-    // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
-    // CHECK  br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
-    llvm.call @body(%iv) : (i64) -> ()
-    omp.yield
+  omp.wsloop schedule(guided, simd) {
+    omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
+      // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741870
+      // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
+      // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
+      // CHECK  br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
+      llvm.call @body(%iv) : (i64) -> ()
+      omp.yield
+    }
+    omp.terminator
   }
   llvm.return
 }
@@ -793,17 +829,19 @@ llvm.func @simd_if(%arg0: !llvm.ptr {fir.bindc_name = "n"}, %arg1: !llvm.ptr {fi
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_ordered(%lb : i64, %ub : i64, %step : i64) -> () {
- omp.wsloop ordered(0)
- for (%iv) : i64 = (%lb) to (%ub) step (%step) {
-  // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 66, i64 1, i64 %{{.*}}, i64 1, i64 1)
-  // CHECK: call void @__kmpc_dispatch_fini_8u
-  // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
-  // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
-  // CHECK  br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
-   llvm.call @body(%iv) : (i64) -> ()
-   omp.yield
- }
- llvm.return
+  omp.wsloop ordered(0) {
+    omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
+      // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 66, i64 1, i64 %{{.*}}, i64 1, i64 1)
+      // CHECK: call void @__kmpc_dispatch_fini_8u
+      // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
+      // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
+      // CHECK  br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
+      llvm.call @body(%iv) : (i64) -> ()
+      omp.yield
+    }
+    omp.terminator
+  }
+  llvm.return
 }
 
 // -----
@@ -811,17 +849,19 @@ llvm.func @test_omp_wsloop_ordered(%lb : i64, %ub : i64, %step : i64) -> () {
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_static_ordered(%lb : i64, %ub : i64, %step : i64) -> () {
- omp.wsloop schedule(static) ordered(0)
- for (%iv) : i64 = (%lb) to (%ub) step (%step) {
-  // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 66, i64 1, i64 %{{.*}}, i64 1, i64 1)
-  // CHECK: call void @__kmpc_dispatch_fini_8u
-  // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
-  // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
-  // CHECK  br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
-   llvm.call @body(%iv) : (i64) -> ()
-   omp.yield
- }
- llvm.return
+  omp.wsloop schedule(static) ordered(0) {
+    omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
+      // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 66, i64 1, i64 %{{.*}}, i64 1, i64 1)
+      // CHECK: call void @__kmpc_dispatch_fini_8u
+      // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
+      // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
+      // CHECK  br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
+      llvm.call @body(%iv) : (i64) -> ()
+      omp.yield
+    }
+    omp.terminator
+  }
+  llvm.return
 }
 
 // -----
@@ -829,18 +869,20 @@ llvm.func @test_omp_wsloop_static_ordered(%lb : i64, %ub : i64, %step : i64) ->
 llvm.func @body(i32)
 
 llvm.func @test_omp_wsloop_static_chunk_ordered(%lb : i32, %ub : i32, %step : i32) -> () {
- %static_chunk_size = llvm.mlir.constant(1 : i32) : i32
- omp.wsloop schedule(static = %static_chunk_size : i32) ordered(0)
- for (%iv) : i32 = (%lb) to (%ub) step (%step) {
-  // CHECK: call void @__kmpc_dispatch_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 65, i32 1, i32 %{{.*}}, i32 1, i32 1)
-  // CHECK: call void @__kmpc_dispatch_fini_4u
-  // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u
-  // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
-  // CHECK  br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
-   llvm.call @body(%iv) : (i32) -> ()
-   omp.yield
- }
- llvm.return
+  %static_chunk_size = llvm.mlir.constant(1 : i32) : i32
+  omp.wsloop schedule(static = %static_chunk_size : i32) ordered(0) {
+    omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
+      // CHECK: call void @__kmpc_dispatch_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 65, i32 1, i32 %{{.*}}, i32 1, i32 1)
+      // CHECK: call void @__kmpc_dispatch_fini_4u
+      // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u
+      // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
+      // CHECK  br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
+      llvm.call @body(%iv) : (i32) -> ()
+      omp.yield
+    }
+    omp.terminator
+  }
+  llvm.return
 }
 
 // -----
@@ -848,17 +890,19 @@ llvm.func @test_omp_wsloop_static_chunk_ordered(%lb : i32, %ub : i32, %step : i3
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_dynamic_ordered(%lb : i64, %ub : i64, %step : i64) -> () {
- omp.wsloop schedule(dynamic) ordered(0)
- for (%iv) : i64 = (%lb) to (%ub) step (%step) {
-  // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 67, i64 1, i64 %{{.*}}, i64 1, i64 1)
-  // CHECK: call void @__kmpc_dispatch_fini_8u
-  // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
-  // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
-  // CHECK  br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
-   llvm.call @body(%iv) : (i64) -> ()
-   omp.yield
- }
- llvm.return
+  omp.wsloop schedule(dynamic) ordered(0) {
+    omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
+      // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 67, i64 1, i64 %{{.*}}, i64 1, i64 1)
+      // CHECK: call void @__kmpc_dispatch_fini_8u
+      // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
+      // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
+      // CHECK  br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
+      llvm.call @body(%iv) : (i64) -> ()
+      omp.yield
+    }
+    omp.terminator
+  }
+  llvm.return
 }
 
 // -----
@@ -866,17 +910,19 @@ llvm.func @test_omp_wsloop_dynamic_ordered(%lb : i64, %ub : i64, %step : i64) ->
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_auto_ordered(%lb : i64, %ub : i64, %step : i64) -> () {
- omp.wsloop schedule(auto) ordered(0)
- for (%iv) : i64 = (%lb) to (%ub) step (%step) {
-  // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 70, i64 1, i64 %{{.*}}, i64 1, i64 1)
-  // CHECK: call void @__kmpc_dispatch_fini_8u
-  // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
-  // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
-  // CHECK  br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
-   llvm.call @body(%iv) : (i64) -> ()
-   omp.yield
- }
- llvm.return
+  omp.wsloop schedule(auto) ordered(0) {
+    omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
+      // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 70, i64 1, i64 %{{.*}}, i64 1, i64 1)
+      // CHECK: call void @__kmpc_dispatch_fini_8u
+      // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
+      // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
+      // CHECK  br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
+      llvm.call @body(%iv) : (i64) -> ()
+      omp.yield
+    }
+    omp.terminator
+  }
+  llvm.return
 }
 
 // -----
@@ -884,17 +930,19 @@ llvm.func @test_omp_wsloop_auto_ordered(%lb : i64, %ub : i64, %step : i64) -> ()
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_runtime_ordered(%lb : i64, %ub : i64, %step : i64) -> () {
- omp.wsloop schedule(runtime) ordered(0)
- for (%iv) : i64 = (%lb) to (%ub) step (%step) {
-  // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 69, i64 1, i64 %{{.*}}, i64 1, i64 1)
-  // CHECK: call void @__kmpc_dispatch_fini_8u
-  // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
-  // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
-  // CHECK  br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
-   llvm.call @body(%iv) : (i64) -> ()
-   omp.yield
- }
- llvm.return
+  omp.wsloop schedule(runtime) ordered(0) {
+    omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
+      // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 69, i64 1, i64 %{{.*}}, i64 1, i64 1)
+      // CHECK: call void @__kmpc_dispatch_fini_8u
+      // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
+      // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
+      // CHECK  br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
+      llvm.call @body(%iv) : (i64) -> ()
+      omp.yield
+    }
+    omp.terminator
+  }
+  llvm.return
 }
 
 // -----
@@ -902,17 +950,19 @@ llvm.func @test_omp_wsloop_runtime_ordered(%lb : i64, %ub : i64, %step : i64) ->
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_guided_ordered(%lb : i64, %ub : i64, %step : i64) -> () {
- omp.wsloop schedule(guided) ordered(0)
- for (%iv) : i64 = (%lb) to (%ub) step (%step) {
-  // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 68, i64 1, i64 %{{.*}}, i64 1, i64 1)
-  // CHECK: call void @__kmpc_dispatch_fini_8u
-  // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
-  // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
-  // CHECK  br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
-   llvm.call @body(%iv) : (i64) -> ()
-   omp.yield
- }
- llvm.return
+  omp.wsloop schedule(guided) ordered(0) {
+    omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
+      // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 68, i64 1, i64 %{{.*}}, i64 1, i64 1)
+      // CHECK: call void @__kmpc_dispatch_fini_8u
+      // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
+      // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
+      // CHECK  br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
+      llvm.call @body(%iv) : (i64) -> ()
+      omp.yield
+    }
+    omp.terminator
+  }
+  llvm.return
 }
 
 // -----
@@ -920,17 +970,19 @@ llvm.func @test_omp_wsloop_guided_ordered(%lb : i64, %ub : i64, %step : i64) ->
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_dynamic_nonmonotonic_ordered(%lb : i64, %ub : i64, %step : i64) -> () {
- omp.wsloop schedule(dynamic, nonmonotonic) ordered(0)
- for (%iv) : i64 = (%lb) to (%ub) step (%step) {
-  // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741891, i64 1, i64 %{{.*}}, i64 1, i64 1)
-  // CHECK: call void @__kmpc_dispatch_fini_8u
-  // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
-  // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
-  // CHECK  br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
-   llvm.call @body(%iv) : (i64) -> ()
-   omp.yield
- }
- llvm.return
+  omp.wsloop schedule(dynamic, nonmonotonic) ordered(0) {
+    omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
+      // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741891, i64 1, i64 %{{.*}}, i64 1, i64 1)
+      // CHECK: call void @__kmpc_dispatch_fini_8u
+      // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
+      // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
+      // CHECK  br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
+      llvm.call @body(%iv) : (i64) -> ()
+      omp.yield
+    }
+    omp.terminator
+  }
+  llvm.return
 }
 
 // -----
@@ -938,17 +990,19 @@ llvm.func @test_omp_wsloop_dynamic_nonmonotonic_ordered(%lb : i64, %ub : i64, %s
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_dynamic_monotonic_ordered(%lb : i64, %ub : i64, %step : i64) -> () {
- omp.wsloop schedule(dynamic, monotonic) ordered(0)
- for (%iv) : i64 = (%lb) to (%ub) step (%step) {
-  // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 536870979, i64 1, i64 %{{.*}}, i64 1, i64 1)
-  // CHECK: call void @__kmpc_dispatch_fini_8u
-  // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
-  // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
-  // CHECK  br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
-   llvm.call @body(%iv) : (i64) -> ()
-   omp.yield
- }
- llvm.return
+  omp.wsloop schedule(dynamic, monotonic) ordered(0) {
+    omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
+      // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 536870979, i64 1, i64 %{{.*}}, i64 1, i64 1)
+      // CHECK: call void @__kmpc_dispatch_fini_8u
+      // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
+      // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
+      // CHECK  br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
+      llvm.call @body(%iv) : (i64) -> ()
+      omp.yield
+    }
+    omp.terminator
+  }
+  llvm.return
 }
 
 // -----
@@ -1114,14 +1168,16 @@ llvm.func @collapse_wsloop(
     // CHECK: %[[TOTAL_SUB_1:.*]] = sub i32 %[[TOTAL]], 1
     // CHECK: store i32 %[[TOTAL_SUB_1]], ptr
     // CHECK: call void @__kmpc_for_static_init_4u
-    omp.wsloop
-    for (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) {
-      %31 = llvm.load %20 : !llvm.ptr -> i32
-      %32 = llvm.add %31, %arg0 : i32
-      %33 = llvm.add %32, %arg1 : i32
-      %34 = llvm.add %33, %arg2 : i32
-      llvm.store %34, %20 : i32, !llvm.ptr
-      omp.yield
+    omp.wsloop {
+      omp.loop_nest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) {
+        %31 = llvm.load %20 : !llvm.ptr -> i32
+        %32 = llvm.add %31, %arg0 : i32
+        %33 = llvm.add %32, %arg1 : i32
+        %34 = llvm.add %33, %arg2 : i32
+        llvm.store %34, %20 : i32, !llvm.ptr
+        omp.yield
+      }
+      omp.terminator
     }
     omp.terminator
   }
@@ -1175,14 +1231,16 @@ llvm.func @collapse_wsloop_dynamic(
     // CHECK: store i32 1, ptr
     // CHECK: store i32 %[[TOTAL]], ptr
     // CHECK: call void @__kmpc_dispatch_init_4u
-    omp.wsloop schedule(dynamic)
-    for (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) {
-      %31 = llvm.load %20 : !llvm.ptr -> i32
-      %32 = llvm.add %31, %arg0 : i32
-      %33 = llvm.add %32, %arg1 : i32
-      %34 = llvm.add %33, %arg2 : i32
-      llvm.store %34, %20 : i32, !llvm.ptr
-      omp.yield
+    omp.wsloop schedule(dynamic) {
+      omp.loop_nest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) {
+        %31 = llvm.load %20 : !llvm.ptr -> i32
+        %32 = llvm.add %31, %arg0 : i32
+        %33 = llvm.add %32, %arg1 : i32
+        %34 = llvm.add %33, %arg2 : i32
+        llvm.store %34, %20 : i32, !llvm.ptr
+        omp.yield
+      }
+      omp.terminator
     }
     omp.terminator
   }
@@ -1207,63 +1265,69 @@ llvm.func @omp_ordered(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i64,
   // CHECK: call void @__kmpc_end_ordered(ptr @[[GLOB1]], i32 [[OMP_THREAD]])
   }
 
-  omp.wsloop ordered(0)
-  for (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) {
-    // CHECK:  call void @__kmpc_ordered(ptr @[[GLOB3:[0-9]+]], i32 [[OMP_THREAD2:%.*]])
-    omp.ordered.region  {
-      omp.terminator
-    // CHECK: call void @__kmpc_end_ordered(ptr @[[GLOB3]], i32 [[OMP_THREAD2]])
+  omp.wsloop ordered(0) {
+    omp.loop_nest (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) {
+      // CHECK:  call void @__kmpc_ordered(ptr @[[GLOB3:[0-9]+]], i32 [[OMP_THREAD2:%.*]])
+      omp.ordered.region  {
+        omp.terminator
+      // CHECK: call void @__kmpc_end_ordered(ptr @[[GLOB3]], i32 [[OMP_THREAD2]])
+      }
+      omp.yield
     }
-    omp.yield
+    omp.terminator
   }
 
-  omp.wsloop ordered(1)
-  for (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) {
-    // CHECK: [[TMP:%.*]] = getelementptr inbounds [1 x i64], ptr [[ADDR]], i64 0, i64 0
-    // CHECK: store i64 [[ARG0:%.*]], ptr [[TMP]], align 8
-    // CHECK: [[TMP2:%.*]] = getelementptr inbounds [1 x i64], ptr [[ADDR]], i64 0, i64 0
-    // CHECK: [[OMP_THREAD2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]])
-    // CHECK: call void @__kmpc_doacross_wait(ptr @[[GLOB3]], i32 [[OMP_THREAD2]], ptr [[TMP2]])
-    omp.ordered depend_type(dependsink) depend_vec(%arg3 : i64) {num_loops_val = 1 : i64}
+  omp.wsloop ordered(1) {
+    omp.loop_nest (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) {
+      // CHECK: [[TMP:%.*]] = getelementptr inbounds [1 x i64], ptr [[ADDR]], i64 0, i64 0
+      // CHECK: store i64 [[ARG0:%.*]], ptr [[TMP]], align 8
+      // CHECK: [[TMP2:%.*]] = getelementptr inbounds [1 x i64], ptr [[ADDR]], i64 0, i64 0
+      // CHECK: [[OMP_THREAD2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]])
+      // CHECK: call void @__kmpc_doacross_wait(ptr @[[GLOB3]], i32 [[OMP_THREAD2]], ptr [[TMP2]])
+      omp.ordered depend_type(dependsink) depend_vec(%arg3 : i64) {num_loops_val = 1 : i64}
 
-    // CHECK: [[TMP3:%.*]] = getelementptr inbounds [1 x i64], ptr [[ADDR3]], i64 0, i64 0
-    // CHECK: store i64 [[ARG0]], ptr [[TMP3]], align 8
-    // CHECK: [[TMP4:%.*]] = getelementptr inbounds [1 x i64], ptr [[ADDR3]], i64 0, i64 0
-    // CHECK: [[OMP_THREAD4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB5:[0-9]+]])
-    // CHECK: call void @__kmpc_doacross_post(ptr @[[GLOB5]], i32 [[OMP_THREAD4]], ptr [[TMP4]])
-    omp.ordered depend_type(dependsource) depend_vec(%arg3 : i64) {num_loops_val = 1 : i64}
+      // CHECK: [[TMP3:%.*]] = getelementptr inbounds [1 x i64], ptr [[ADDR3]], i64 0, i64 0
+      // CHECK: store i64 [[ARG0]], ptr [[TMP3]], align 8
+      // CHECK: [[TMP4:%.*]] = getelementptr inbounds [1 x i64], ptr [[ADDR3]], i64 0, i64 0
+      // CHECK: [[OMP_THREAD4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB5:[0-9]+]])
+      // CHECK: call void @__kmpc_doacross_post(ptr @[[GLOB5]], i32 [[OMP_THREAD4]], ptr [[TMP4]])
+      omp.ordered depend_type(dependsource) depend_vec(%arg3 : i64) {num_loops_val = 1 : i64}
 
-    omp.yield
+      omp.yield
+    }
+    omp.terminator
   }
 
-  omp.wsloop ordered(2)
-  for (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) {
-    // CHECK: [[TMP5:%.*]] = getelementptr inbounds [2 x i64], ptr [[ADDR5]], i64 0, i64 0
-    // CHECK: store i64 [[ARG0]], ptr [[TMP5]], align 8
-    // CHECK: [[TMP6:%.*]] = getelementptr inbounds [2 x i64], ptr [[ADDR5]], i64 0, i64 1
-    // CHECK: store i64 [[ARG1:%.*]], ptr [[TMP6]], align 8
-    // CHECK: [[TMP7:%.*]] = getelementptr inbounds [2 x i64], ptr [[ADDR5]], i64 0, i64 0
-    // CHECK: [[OMP_THREAD6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB7:[0-9]+]])
-    // CHECK: call void @__kmpc_doacross_wait(ptr @[[GLOB7]], i32 [[OMP_THREAD6]], ptr [[TMP7]])
-    // CHECK: [[TMP8:%.*]] = getelementptr inbounds [2 x i64], ptr [[ADDR7]], i64 0, i64 0
-    // CHECK: store i64 [[ARG2:%.*]], ptr [[TMP8]], align 8
-    // CHECK: [[TMP9:%.*]] = getelementptr inbounds [2 x i64], ptr [[ADDR7]], i64 0, i64 1
-    // CHECK: store i64 [[ARG3:%.*]], ptr [[TMP9]], align 8
-    // CHECK: [[TMP10:%.*]] = getelementptr inbounds [2 x i64], ptr [[ADDR7]], i64 0, i64 0
-    // CHECK: [[OMP_THREAD8:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB7]])
-    // CHECK: call void @__kmpc_doacross_wait(ptr @[[GLOB7]], i32 [[OMP_THREAD8]], ptr [[TMP10]])
-    omp.ordered depend_type(dependsink) depend_vec(%arg3, %arg4, %arg5, %arg6 : i64, i64, i64, i64) {num_loops_val = 2 : i64}
+  omp.wsloop ordered(2) {
+    omp.loop_nest (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) {
+      // CHECK: [[TMP5:%.*]] = getelementptr inbounds [2 x i64], ptr [[ADDR5]], i64 0, i64 0
+      // CHECK: store i64 [[ARG0]], ptr [[TMP5]], align 8
+      // CHECK: [[TMP6:%.*]] = getelementptr inbounds [2 x i64], ptr [[ADDR5]], i64 0, i64 1
+      // CHECK: store i64 [[ARG1:%.*]], ptr [[TMP6]], align 8
+      // CHECK: [[TMP7:%.*]] = getelementptr inbounds [2 x i64], ptr [[ADDR5]], i64 0, i64 0
+      // CHECK: [[OMP_THREAD6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB7:[0-9]+]])
+      // CHECK: call void @__kmpc_doacross_wait(ptr @[[GLOB7]], i32 [[OMP_THREAD6]], ptr [[TMP7]])
+      // CHECK: [[TMP8:%.*]] = getelementptr inbounds [2 x i64], ptr [[ADDR7]], i64 0, i64 0
+      // CHECK: store i64 [[ARG2:%.*]], ptr [[TMP8]], align 8
+      // CHECK: [[TMP9:%.*]] = getelementptr inbounds [2 x i64], ptr [[ADDR7]], i64 0, i64 1
+      // CHECK: store i64 [[ARG3:%.*]], ptr [[TMP9]], align 8
+      // CHECK: [[TMP10:%.*]] = getelementptr inbounds [2 x i64], ptr [[ADDR7]], i64 0, i64 0
+      // CHECK: [[OMP_THREAD8:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB7]])
+      // CHECK: call void @__kmpc_doacross_wait(ptr @[[GLOB7]], i32 [[OMP_THREAD8]], ptr [[TMP10]])
+      omp.ordered depend_type(dependsink) depend_vec(%arg3, %arg4, %arg5, %arg6 : i64, i64, i64, i64) {num_loops_val = 2 : i64}
+
+      // CHECK: [[TMP11:%.*]] = getelementptr inbounds [2 x i64], ptr [[ADDR9]], i64 0, i64 0
+      // CHECK: store i64 [[ARG0]], ptr [[TMP11]], align 8
+      // CHECK: [[TMP12:%.*]] = getelementptr inbounds [2 x i64], ptr [[ADDR9]], i64 0, i64 1
+      // CHECK: store i64 [[ARG1]], ptr [[TMP12]], align 8
+      // CHECK: [[TMP13:%.*]] = getelementptr inbounds [2 x i64], ptr [[ADDR9]], i64 0, i64 0
+      // CHECK: [[OMP_THREAD10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB9:[0-9]+]])
+      // CHECK: call void @__kmpc_doacross_post(ptr @[[GLOB9]], i32 [[OMP_THREAD10]], ptr [[TMP13]])
+      omp.ordered depend_type(dependsource) depend_vec(%arg3, %arg4 : i64, i64) {num_loops_val = 2 : i64}
 
-    // CHECK: [[TMP11:%.*]] = getelementptr inbounds [2 x i64], ptr [[ADDR9]], i64 0, i64 0
-    // CHECK: store i64 [[ARG0]], ptr [[TMP11]], align 8
-    // CHECK: [[TMP12:%.*]] = getelementptr inbounds [2 x i64], ptr [[ADDR9]], i64 0, i64 1
-    // CHECK: store i64 [[ARG1]], ptr [[TMP12]], align 8
-    // CHECK: [[TMP13:%.*]] = getelementptr inbounds [2 x i64], ptr [[ADDR9]], i64 0, i64 0
-    // CHECK: [[OMP_THREAD10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB9:[0-9]+]])
-    // CHECK: call void @__kmpc_doacross_post(ptr @[[GLOB9]], i32 [[OMP_THREAD10]], ptr [[TMP13]])
-    omp.ordered depend_type(dependsource) depend_vec(%arg3, %arg4 : i64, i64) {num_loops_val = 2 : i64}
-
-    omp.yield
+      omp.yield
+    }
+    omp.terminator
   }
 
   llvm.return
@@ -2133,10 +2197,13 @@ llvm.func @omp_sections_with_clauses() -> () {
 // introduction mechanism itself is tested elsewhere.
 // CHECK-LABEL: @repeated_successor
 llvm.func @repeated_successor(%arg0: i64, %arg1: i64, %arg2: i64, %arg3: i1) {
-  omp.wsloop for (%arg4) : i64 = (%arg0) to (%arg1) step (%arg2)  {
-    llvm.cond_br %arg3, ^bb1(%arg0 : i64), ^bb1(%arg1 : i64)
-  ^bb1(%0: i64):  // 2 preds: ^bb0, ^bb0
-    omp.yield
+  omp.wsloop {
+    omp.loop_nest (%arg4) : i64 = (%arg0) to (%arg1) step (%arg2)  {
+      llvm.cond_br %arg3, ^bb1(%arg0 : i64), ^bb1(%arg1 : i64)
+    ^bb1(%0: i64):  // 2 preds: ^bb0, ^bb0
+      omp.yield
+    }
+    omp.terminator
   }
   llvm.return
 }
diff --git a/mlir/test/Target/LLVMIR/openmp-nested.mlir b/mlir/test/Target/LLVMIR/openmp-nested.mlir
index e1fdfdd24a3cb0..ce5f22f10d7dce 100644
--- a/mlir/test/Target/LLVMIR/openmp-nested.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-nested.mlir
@@ -11,20 +11,26 @@ module {
       %2 = llvm.mlir.constant(0 : index) : i64
       %4 = llvm.mlir.constant(0 : i32) : i32
       %12 = llvm.alloca %0 x i64 : (i64) -> !llvm.ptr
-      omp.wsloop for (%arg2) : i64 = (%2) to (%1) step (%0)  {
-        omp.parallel   {
-          omp.wsloop for (%arg3) : i64 = (%2) to (%0) step (%0)  {
-            llvm.store %2, %12 : i64, !llvm.ptr
-            omp.yield
+      omp.wsloop {
+        omp.loop_nest (%arg2) : i64 = (%2) to (%1) step (%0) {
+          omp.parallel {
+            omp.wsloop {
+              omp.loop_nest (%arg3) : i64 = (%2) to (%0) step (%0) {
+                llvm.store %2, %12 : i64, !llvm.ptr
+                omp.yield
+              }
+              omp.terminator
+            }
+            omp.terminator
           }
-          omp.terminator
+          %19 = llvm.load %12 : !llvm.ptr -> i64
+          %20 = llvm.trunc %19 : i64 to i32
+          %5 = llvm.mlir.addressof @str0 : !llvm.ptr
+          %6 = llvm.getelementptr %5[%4, %4] : (!llvm.ptr, i32, i32) -> !llvm.ptr, !llvm.array<29 x i8>
+          %21 = llvm.call @printf(%6, %20, %20) vararg(!llvm.func<i32 (ptr, ...)>): (!llvm.ptr, i32, i32) -> i32
+          omp.yield
         }
-        %19 = llvm.load %12 : !llvm.ptr -> i64
-        %20 = llvm.trunc %19 : i64 to i32
-        %5 = llvm.mlir.addressof @str0 : !llvm.ptr
-        %6 = llvm.getelementptr %5[%4, %4] : (!llvm.ptr, i32, i32) -> !llvm.ptr, !llvm.array<29 x i8>
-        %21 = llvm.call @printf(%6, %20, %20) vararg(!llvm.func<i32 (ptr, ...)>): (!llvm.ptr, i32, i32) -> i32
-        omp.yield
+        omp.terminator
       }
       omp.terminator
     }
diff --git a/mlir/test/Target/LLVMIR/openmp-reduction.mlir b/mlir/test/Target/LLVMIR/openmp-reduction.mlir
index 39b64d71a2274b..bfdad8c19335e1 100644
--- a/mlir/test/Target/LLVMIR/openmp-reduction.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-reduction.mlir
@@ -26,13 +26,15 @@ llvm.func @simple_reduction(%lb : i64, %ub : i64, %step : i64) {
   %c1 = llvm.mlir.constant(1 : i32) : i32
   %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
   omp.parallel {
-    omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr)
-    for (%iv) : i64 = (%lb) to (%ub) step (%step) {
-      %1 = llvm.mlir.constant(2.0 : f32) : f32
-      %2 = llvm.load %prv : !llvm.ptr -> f32
-      %3 = llvm.fadd %1, %2 : f32
-      llvm.store %3, %prv : f32, !llvm.ptr
-      omp.yield
+    omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr) {
+      omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
+        %1 = llvm.mlir.constant(2.0 : f32) : f32
+        %2 = llvm.load %prv : !llvm.ptr -> f32
+        %3 = llvm.fadd %1, %2 : f32
+        llvm.store %3, %prv : f32, !llvm.ptr
+        omp.yield
+      }
+      omp.terminator
     }
     omp.terminator
   }
@@ -105,16 +107,18 @@ llvm.func @reuse_declaration(%lb : i64, %ub : i64, %step : i64) {
   %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
   %2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
   omp.parallel {
-    omp.wsloop reduction(@add_f32 %0 -> %prv0 : !llvm.ptr, @add_f32 %2 -> %prv1 : !llvm.ptr)
-    for (%iv) : i64 = (%lb) to (%ub) step (%step) {
-      %1 = llvm.mlir.constant(2.0 : f32) : f32
-      %3 = llvm.load %prv0 : !llvm.ptr -> f32
-      %4 = llvm.fadd %3, %1 : f32
-      llvm.store %4, %prv0 : f32, !llvm.ptr
-      %5 = llvm.load %prv1 : !llvm.ptr -> f32
-      %6 = llvm.fadd %5, %1 : f32
-      llvm.store %6, %prv1 : f32, !llvm.ptr
-      omp.yield
+    omp.wsloop reduction(@add_f32 %0 -> %prv0 : !llvm.ptr, @add_f32 %2 -> %prv1 : !llvm.ptr) {
+      omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
+        %1 = llvm.mlir.constant(2.0 : f32) : f32
+        %3 = llvm.load %prv0 : !llvm.ptr -> f32
+        %4 = llvm.fadd %3, %1 : f32
+        llvm.store %4, %prv0 : f32, !llvm.ptr
+        %5 = llvm.load %prv1 : !llvm.ptr -> f32
+        %6 = llvm.fadd %5, %1 : f32
+        llvm.store %6, %prv1 : f32, !llvm.ptr
+        omp.yield
+      }
+      omp.terminator
     }
     omp.terminator
   }
@@ -195,13 +199,15 @@ llvm.func @missing_omp_reduction(%lb : i64, %ub : i64, %step : i64) {
   %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
   %2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
   omp.parallel {
-    omp.wsloop reduction(@add_f32 %0 -> %prv0 : !llvm.ptr, @add_f32 %2 -> %prv1 : !llvm.ptr)
-    for (%iv) : i64 = (%lb) to (%ub) step (%step) {
-      %1 = llvm.mlir.constant(2.0 : f32) : f32
-      %3 = llvm.load %prv0 : !llvm.ptr -> f32
-      %4 = llvm.fadd %3, %1 : f32
-      llvm.store %4, %prv0 : f32, !llvm.ptr
-      omp.yield
+    omp.wsloop reduction(@add_f32 %0 -> %prv0 : !llvm.ptr, @add_f32 %2 -> %prv1 : !llvm.ptr) {
+      omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
+        %1 = llvm.mlir.constant(2.0 : f32) : f32
+        %3 = llvm.load %prv0 : !llvm.ptr -> f32
+        %4 = llvm.fadd %3, %1 : f32
+        llvm.store %4, %prv0 : f32, !llvm.ptr
+        omp.yield
+      }
+      omp.terminator
     }
     omp.terminator
   }
@@ -280,16 +286,18 @@ llvm.func @double_reference(%lb : i64, %ub : i64, %step : i64) {
   %c1 = llvm.mlir.constant(1 : i32) : i32
   %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
   omp.parallel {
-    omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr)
-    for (%iv) : i64 = (%lb) to (%ub) step (%step) {
-      %1 = llvm.mlir.constant(2.0 : f32) : f32
-      %2 = llvm.load %prv : !llvm.ptr -> f32
-      %3 = llvm.fadd %2, %1 : f32
-      llvm.store %3, %prv : f32, !llvm.ptr
-      %4 = llvm.load %prv : !llvm.ptr -> f32
-      %5 = llvm.fadd %4, %1 : f32
-      llvm.store %5, %prv : f32, !llvm.ptr
-      omp.yield
+    omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr) {
+      omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
+        %1 = llvm.mlir.constant(2.0 : f32) : f32
+        %2 = llvm.load %prv : !llvm.ptr -> f32
+        %3 = llvm.fadd %2, %1 : f32
+        llvm.store %3, %prv : f32, !llvm.ptr
+        %4 = llvm.load %prv : !llvm.ptr -> f32
+        %5 = llvm.fadd %4, %1 : f32
+        llvm.store %5, %prv : f32, !llvm.ptr
+        omp.yield
+      }
+      omp.terminator
     }
     omp.terminator
   }
@@ -374,16 +382,18 @@ llvm.func @no_atomic(%lb : i64, %ub : i64, %step : i64) {
   %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
   %2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
   omp.parallel {
-    omp.wsloop reduction(@add_f32 %0 -> %prv0 : !llvm.ptr, @mul_f32 %2 -> %prv1 : !llvm.ptr)
-    for (%iv) : i64 = (%lb) to (%ub) step (%step) {
-      %1 = llvm.mlir.constant(2.0 : f32) : f32
-      %3 = llvm.load %prv0 : !llvm.ptr -> f32
-      %4 = llvm.fadd %3, %1 : f32
-      llvm.store %4, %prv0 : f32, !llvm.ptr
-      %5 = llvm.load %prv1 : !llvm.ptr -> f32
-      %6 = llvm.fmul %5, %1 : f32
-      llvm.store %6, %prv1 : f32, !llvm.ptr
-      omp.yield
+    omp.wsloop reduction(@add_f32 %0 -> %prv0 : !llvm.ptr, @mul_f32 %2 -> %prv1 : !llvm.ptr) {
+      omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
+        %1 = llvm.mlir.constant(2.0 : f32) : f32
+        %3 = llvm.load %prv0 : !llvm.ptr -> f32
+        %4 = llvm.fadd %3, %1 : f32
+        llvm.store %4, %prv0 : f32, !llvm.ptr
+        %5 = llvm.load %prv1 : !llvm.ptr -> f32
+        %6 = llvm.fmul %5, %1 : f32
+        llvm.store %6, %prv1 : f32, !llvm.ptr
+        omp.yield
+      }
+      omp.terminator
     }
     omp.terminator
   }
@@ -531,12 +541,15 @@ llvm.func @parallel_nested_workshare_reduction(%ub : i64) {
   %step = llvm.mlir.constant(1 : i64) : i64
   
   omp.parallel reduction(@add_i32 %0 -> %prv : !llvm.ptr) {
-    omp.wsloop for (%iv) : i64 = (%lb) to (%ub) step (%step) {
-      %ival = llvm.trunc %iv : i64 to i32
-      %lprv = llvm.load %prv : !llvm.ptr -> i32
-      %add = llvm.add %lprv, %ival : i32
-      llvm.store %add, %prv : i32, !llvm.ptr
-      omp.yield
+    omp.wsloop {
+      omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
+        %ival = llvm.trunc %iv : i64 to i32
+        %lprv = llvm.load %prv : !llvm.ptr -> i32
+        %add = llvm.add %lprv, %ival : i32
+        llvm.store %add, %prv : i32, !llvm.ptr
+        omp.yield
+      }
+      omp.terminator
     }
     omp.terminator
   }



More information about the llvm-branch-commits mailing list