[flang-commits] [flang] dc4bf2c - [flang][OpenMP]Make omp.wsloop arguments appear in memory (#1277)

Wed Jun 1 09:21:13 PDT 2022

Author: Mats Petersson
Date: 2022-06-01T17:20:06+01:00
New Revision: dc4bf2c33ce48ab03ba2a497e4d2c875162b0435

URL: https://github.com/llvm/llvm-project/commit/dc4bf2c33ce48ab03ba2a497e4d2c875162b0435
DIFF: https://github.com/llvm/llvm-project/commit/dc4bf2c33ce48ab03ba2a497e4d2c875162b0435.diff

LOG: [flang][OpenMP]Make omp.wsloop arguments appear in memory (#1277)

As per issue #1196, the loop induction variable, which is an argument
in the omp.wsloop operation, does not have a memory location, so when
passed to a function or subroutine, the reference to the value is not
a memory location, but the value of the induction variable. The callee
function/subroutine is then trying to dereference memory at address 1
or some other "not a good memory location".

This is fixed by creating a temporary memory location and storing the
value of the induction variable in that.

Test fixes as a consequence of the changed code generated.

Add checking for some of the omp-unstructured.f90 to check for alloca,
store and load operations, to ensure the correct flow. Add a test
for CYCLE inside a omp-do loop.

Also convert to use -emit-fir in the omp-unstructrued, and make
the symbol matching consistent in the omp-wsloop-variable test.

Reviewed By: peixin

Differential Revision: https://reviews.llvm.org/D126711

Added: 
    

Modified: 
    flang/lib/Lower/OpenMP.cpp
    flang/test/Lower/OpenMP/omp-parallel-wsloop.f90
    flang/test/Lower/OpenMP/omp-unstructured.f90
    flang/test/Lower/OpenMP/omp-wsloop-chunks.f90
    flang/test/Lower/OpenMP/omp-wsloop-collapse.f90
    flang/test/Lower/OpenMP/omp-wsloop-variable.f90
    flang/test/Lower/OpenMP/omp-wsloop.f90

Removed: 
    


################################################################################
diff  --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp
index 85a54ebc412ca..32dd399dfa8de 100644

--- a/flang/lib/Lower/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP.cpp
@@ -13,6 +13,7 @@
 #include "flang/Lower/OpenMP.h"
 #include "flang/Common/idioms.h"
 #include "flang/Lower/Bridge.h"
+#include "flang/Lower/ConvertExpr.h"
 #include "flang/Lower/PFTBuilder.h"
 #include "flang/Lower/StatementContext.h"
 #include "flang/Lower/Todo.h"
@@ -184,6 +185,7 @@ createBodyOfOp(Op &op, Fortran::lower::AbstractConverter &converter,
   // argument. Also update the symbol's address with the mlir argument value.
   // e.g. For loops the argument is the induction variable. And all further
   // uses of the induction variable should use this mlir value.
+  mlir::Operation *storeOp = nullptr;
   if (args.size()) {
     std::size_t loopVarTypeSize = 0;
     for (const Fortran::semantics::Symbol *arg : args)
@@ -197,14 +199,25 @@ createBodyOfOp(Op &op, Fortran::lower::AbstractConverter &converter,
     }
     firOpBuilder.createBlock(&op.getRegion(), {}, tiv, locs);
     int argIndex = 0;
+    // The argument is not currently in memory, so make a temporary for the
+    // argument, and store it there, then bind that location to the argument.
     for (const Fortran::semantics::Symbol *arg : args) {
-      fir::ExtendedValue exval = op.getRegion().front().getArgument(argIndex);
-      converter.bindSymbol(*arg, exval);
+      mlir::Value val =
+          fir::getBase(op.getRegion().front().getArgument(argIndex));
+      mlir::Value temp = firOpBuilder.createTemporary(
+          loc, loopVarType,
+          llvm::ArrayRef<mlir::NamedAttribute>{
+              Fortran::lower::getAdaptToByRefAttr(firOpBuilder)});
+      storeOp = firOpBuilder.create<fir::StoreOp>(loc, val, temp);
+      converter.bindSymbol(*arg, temp);
       argIndex++;
     }
   } else {
     firOpBuilder.createBlock(&op.getRegion());
   }
+  // Set the insert for the terminator operation to go at the end of the
+  // block - this is either empty or the block with the stores above,
+  // the end of the block works for both.
   mlir::Block &block = op.getRegion().back();
   firOpBuilder.setInsertionPointToEnd(&block);
 
@@ -221,8 +234,11 @@ createBodyOfOp(Op &op, Fortran::lower::AbstractConverter &converter,
     firOpBuilder.create<mlir::omp::TerminatorOp>(loc);
   }
 
-  // Reset the insertion point to the start of the first block.
-  firOpBuilder.setInsertionPointToStart(&block);
+  // Reset the insert point to before the terminator.
+  if (storeOp)
+    firOpBuilder.setInsertionPointAfter(storeOp);
+  else
+    firOpBuilder.setInsertionPointToStart(&block);
 
   // Handle privatization. Do not privatize if this is the outer operation.
   if (clauses && !outerCombined)

diff  --git a/flang/test/Lower/OpenMP/omp-parallel-wsloop.f90 b/flang/test/Lower/OpenMP/omp-parallel-wsloop.f90
index 80bb1ca19daeb..67418a1320b09 100644
--- a/flang/test/Lower/OpenMP/omp-parallel-wsloop.f90
+++ b/flang/test/Lower/OpenMP/omp-parallel-wsloop.f90
@@ -12,7 +12,9 @@ subroutine simple_parallel_do
   ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP PARALLEL DO
   do i=1, 9
-  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref<i8>, i32) -> i1
+  ! CHECK:    fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref<i32>
+  ! CHECK:    %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]] : !fir.ref<i32>
+  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) : (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
   ! CHECK:       omp.yield
@@ -36,7 +38,9 @@ subroutine parallel_do_with_parallel_clauses(cond, nt)
   ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP PARALLEL DO IF(cond) NUM_THREADS(nt) PROC_BIND(close)
   do i=1, 9
-  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref<i8>, i32) -> i1
+  ! CHECK:    fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref<i32>
+  ! CHECK:    %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]] : !fir.ref<i32>
+  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) : (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
   ! CHECK:       omp.yield
@@ -57,7 +61,9 @@ subroutine parallel_do_with_clauses(nt)
   ! CHECK:     omp.wsloop schedule(dynamic) for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP PARALLEL DO NUM_THREADS(nt) SCHEDULE(dynamic)
   do i=1, 9
-  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref<i8>, i32) -> i1
+  ! CHECK:    fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref<i32>
+  ! CHECK:    %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]] : !fir.ref<i32>
+  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) : (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
   ! CHECK:       omp.yield
@@ -82,7 +88,9 @@ subroutine parallel_do_with_privatisation_clauses(cond,nt)
   ! CHECK:    omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP PARALLEL DO PRIVATE(cond) FIRSTPRIVATE(nt)
   do i=1, 9
-  ! CHECK:      fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref<i8>, i32) -> i1
+  ! CHECK:    fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref<i32>
+  ! CHECK:    %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]] : !fir.ref<i32>
+  ! CHECK:      fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) : (!fir.ref<i8>, i32) -> i1
   ! CHECK:      %[[PRIVATE_COND_VAL:.*]] = fir.load %[[PRIVATE_COND_REF]] : !fir.ref<!fir.logical<4>>
   ! CHECK:      %[[PRIVATE_COND_VAL_CVT:.*]] = fir.convert %[[PRIVATE_COND_VAL]] : (!fir.logical<4>) -> i1
   ! CHECK:      fir.call @_FortranAioOutputLogical({{.*}}, %[[PRIVATE_COND_VAL_CVT]]) : (!fir.ref<i8>, i1) -> i1

diff  --git a/flang/test/Lower/OpenMP/omp-unstructured.f90 b/flang/test/Lower/OpenMP/omp-unstructured.f90
index 7272d0206f58f..28ed87169bbc6 100644
--- a/flang/test/Lower/OpenMP/omp-unstructured.f90
+++ b/flang/test/Lower/OpenMP/omp-unstructured.f90
@@ -1,17 +1,17 @@
 ! Test unstructured code adjacent to and inside OpenMP constructs.
 
-! RUN: bbc %s -fopenmp -o "-" | FileCheck %s
+! RUN: bbc %s -fopenmp -emit-fir -o "-" | FileCheck %s
 
 ! CHECK-LABEL: func @_QPss1{{.*}} {
 ! CHECK:   br ^bb1
-! CHECK: ^bb1:  // 2 preds: ^bb0, ^bb3
-! CHECK:   cond_br %{{[0-9]*}}, ^bb2, ^bb4
+! CHECK: ^bb1:  // 2 preds: ^bb0, ^bb4
+! CHECK:   cond_br %{{[0-9]*}}, ^bb2, ^bb5
 ! CHECK: ^bb2:  // pred: ^bb1
-! CHECK:   cond_br %{{[0-9]*}}, ^bb4, ^bb3
-! CHECK: ^bb3:  // pred: ^bb2
-! CHECK:   @_FortranAioBeginExternalListOutput
+! CHECK:   cond_br %{{[0-9]*}}, ^bb3, ^bb4
+! CHECK: ^bb4:  // pred: ^bb2
+! CHECK:   fir.call @_FortranAioBeginExternalListOutput
 ! CHECK:   br ^bb1
-! CHECK: ^bb4:  // 2 preds: ^bb1, ^bb2
+! CHECK: ^bb5:  // 2 preds: ^bb1, ^bb3
 ! CHECK:   omp.master  {
 ! CHECK:     @_FortranAioBeginExternalListOutput
 ! CHECK:     omp.terminator
@@ -33,14 +33,14 @@ subroutine ss1(n) ! unstructured code followed by a structured OpenMP construct
 ! CHECK:   omp.master  {
 ! CHECK:     @_FortranAioBeginExternalListOutput
 ! CHECK:     br ^bb1
-! CHECK:   ^bb1:  // 2 preds: ^bb0, ^bb3
-! CHECK:     cond_br %{{[0-9]*}}, ^bb2, ^bb4
+! CHECK:   ^bb1:  // 2 preds: ^bb0, ^bb4
+! CHECK:     cond_br %{{[0-9]*}}, ^bb2, ^bb5
 ! CHECK:   ^bb2:  // pred: ^bb1
-! CHECK:     cond_br %{{[0-9]*}}, ^bb4, ^bb3
+! CHECK:     cond_br %{{[0-9]*}}, ^bb3, ^bb4
 ! CHECK:   ^bb3:  // pred: ^bb2
 ! CHECK:     @_FortranAioBeginExternalListOutput
 ! CHECK:     br ^bb1
-! CHECK:   ^bb4:  // 2 preds: ^bb1, ^bb2
+! CHECK:   ^bb5:  // 2 preds: ^bb1, ^bb3
 ! CHECK:     omp.terminator
 ! CHECK:   }
 ! CHECK:   @_FortranAioBeginExternalListOutput
@@ -59,29 +59,38 @@ subroutine ss2(n) ! unstructured OpenMP construct; loop exit inside construct
 end
 
 ! CHECK-LABEL: func @_QPss3{{.*}} {
-! CHECK:   omp.parallel  {
+! CHECK:     %[[ALLOCA_K:.*]] = fir.alloca i32 {bindc_name = "k", {{.*}}}
+! CHECK:   omp.parallel {
+! CHECK:     %[[ALLOCA_1:.*]] = fir.alloca i32 {{{.*}}, pinned}
+! CHECK:     %[[ALLOCA_2:.*]] = fir.alloca i32 {{{.*}}, pinned}
 ! CHECK:     br ^bb1
-! CHECK:   ^bb1:  // 2 preds: ^bb0, ^bb2
-! CHECK:     cond_br %{{[0-9]*}}, ^bb2, ^bb3
+! CHECK:   ^bb1:  // 2 preds: ^bb0, ^bb3
+! CHECK:     cond_br %{{[0-9]*}}, ^bb2, ^bb4
 ! CHECK:   ^bb2:  // pred: ^bb1
-! CHECK:     omp.wsloop {{.*}} {
+! CHECK:     omp.wsloop for (%[[ARG1:.*]]) : {{.*}} {
+! CHECK:       fir.store %[[ARG1]] to %[[ALLOCA_2]] : !fir.ref<i32>
 ! CHECK:     @_FortranAioBeginExternalListOutput
+! CHECK:       %[[LOAD_1:.*]] = fir.load %[[ALLOCA_2]] : !fir.ref<i32>
+! CHECK:     @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD_1]])
 ! CHECK:       omp.yield
 ! CHECK:     }
-! CHECK:     omp.wsloop {{.*}} {
+! CHECK:     omp.wsloop for (%[[ARG2:.*]]) : {{.*}} {
+! CHECK:       fir.store %[[ARG2]] to %[[ALLOCA_1]] : !fir.ref<i32>
 ! CHECK:       br ^bb1
-! CHECK:     ^bb1:  // 2 preds: ^bb0, ^bb3
-! CHECK:       cond_br %{{[0-9]*}}, ^bb2, ^bb4
-! CHECK:     ^bb2:  // pred: ^bb1
-! CHECK:       cond_br %{{[0-9]*}}, ^bb4, ^bb3
+! CHECK:     ^bb2:  // 2 preds: ^bb1, ^bb5
+! CHECK:       cond_br %{{[0-9]*}}, ^bb3, ^bb6
 ! CHECK:     ^bb3:  // pred: ^bb2
+! CHECK:       cond_br %{{[0-9]*}}, ^bb4, ^bb5
+! CHECK:     ^bb4:  // pred: ^bb3
 ! CHECK:       @_FortranAioBeginExternalListOutput
-! CHECK:       br ^bb1
-! CHECK:     ^bb4:  // 2 preds: ^bb1, ^bb2
+! CHECK:       %[[LOAD_2:.*]] = fir.load %[[ALLOCA_K]] : !fir.ref<i32>
+! CHECK:     @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD_2]])
+! CHECK:       br ^bb2
+! CHECK:     ^bb6:  // 2 preds: ^bb2, ^bb4
 ! CHECK:       omp.yield
 ! CHECK:     }
 ! CHECK:     br ^bb1
-! CHECK:   ^bb3:  // pred: ^bb1
+! CHECK:   ^bb4:  // pred: ^bb1
 ! CHECK:     omp.terminator
 ! CHECK:   }
 ! CHECK: }
@@ -107,13 +116,17 @@ subroutine ss3(n) ! nested unstructured OpenMP constructs
 
 ! CHECK-LABEL: func @_QPss4{{.*}} {
 ! CHECK:       omp.parallel {
+! CHECK:         %[[ALLOCA:.*]] = fir.alloca i32 {{{.*}}, pinned}
 ! CHECK:         omp.wsloop for (%[[ARG:.*]]) : {{.*}} {
-! CHECK:           cond_br %{{.*}}, ^bb1, ^bb2
-! CHECK:          ^bb1:
+! CHECK:           fir.store %[[ARG]] to %[[ALLOCA]] : !fir.ref<i32>
+! CHECK:           %[[COND:.*]] = arith.cmpi eq, %{{.*}}, %{{.*}}
+! CHECK:           %[[COND_XOR:.*]] = arith.xori %[[COND]], %{{.*}}
+! CHECK:          fir.if %[[COND_XOR]] {
 ! CHECK:           @_FortranAioBeginExternalListOutput
-! CHECK:           @_FortranAioOutputInteger32(%{{.*}}, %[[ARG]])
-! CHECK:           br ^bb2
-! CHECK:         ^bb2:
+! CHECK:           %[[LOAD:.*]] = fir.load %[[ALLOCA]] : !fir.ref<i32>
+! CHECK:           @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD]])
+! CHECK:          } else {
+! CHECK:          }
 ! CHECK-NEXT:      omp.yield
 ! CHECK-NEXT:  }
 ! CHECK:       omp.terminator
@@ -136,12 +149,16 @@ subroutine ss4(n) ! CYCLE in OpenMP wsloop constructs
 ! CHECK:    omp.wsloop {{.*}} {
 ! CHECK:      br ^[[BB1:.*]]
 ! CHECK:    ^[[BB1]]:
-! CHECK:      cond_br %{{.*}}, ^[[BB2:.*]], ^[[BB4:.*]]
+! CHECK:      br ^[[BB2:.*]]
 ! CHECK:    ^[[BB2]]:
-! CHECK:      cond_br %{{.*}}, ^[[BB4]], ^[[BB3:.*]]
+! CHECK:      cond_br %{{.*}}, ^[[BB3:.*]], ^[[BB6:.*]]
 ! CHECK:    ^[[BB3]]:
-! CHECK:      br ^[[BB1]]
+! CHECK:      cond_br %{{.*}}, ^[[BB4:.*]], ^[[BB3:.*]]
 ! CHECK:    ^[[BB4]]:
+! CHECK:      br ^[[BB6]]
+! CHECK:    ^[[BB3]]:
+! CHECK:      br ^[[BB2]]
+! CHECK:    ^[[BB6]]:
 ! CHECK:      omp.yield
 ! CHECK:    }
 ! CHECK:    omp.terminator
@@ -172,12 +189,16 @@ subroutine ss5() ! EXIT inside OpenMP wsloop (inside parallel)
 ! CHECK:    omp.wsloop {{.*}} {
 ! CHECK:      br ^[[BB1:.*]]
 ! CHECK:    ^[[BB1]]:
-! CHECK:      cond_br %{{.*}}, ^[[BB2:.*]], ^[[BB4:.*]]
+! CHECK:      br ^[[BB2:.*]]
 ! CHECK:    ^[[BB2]]:
-! CHECK:      cond_br %{{.*}}, ^[[BB4]], ^[[BB3:.*]]
+! CHECK:      cond_br %{{.*}}, ^[[BB3:.*]], ^[[BB6:.*]]
 ! CHECK:    ^[[BB3]]:
-! CHECK:      br ^[[BB1]]
+! CHECK:      cond_br %{{.*}}, ^[[BB4:.*]], ^[[BB5:.*]]
 ! CHECK:    ^[[BB4]]:
+! CHECK:      br ^[[BB6]]
+! CHECK:    ^[[BB5]]
+! CHECK:      br ^[[BB2]]
+! CHECK:    ^[[BB6]]:
 ! CHECK:      omp.yield
 ! CHECK:    }
 ! CHECK:    br ^[[BB1_OUTER]]
@@ -212,12 +233,16 @@ subroutine ss6() ! EXIT inside OpenMP wsloop in a do loop (inside parallel)
 ! CHECK:     omp.wsloop {{.*}} {
 ! CHECK:       br ^[[BB1:.*]]
 ! CHECK-NEXT:     ^[[BB1]]:
-! CHECK:       cond_br %{{.*}}, ^[[BB2:.*]], ^[[BB4:.*]]
+! CHECK:       br ^[[BB2:.*]]
 ! CHECK-NEXT:     ^[[BB2]]:
-! CHECK:       cond_br %{{.*}}, ^[[BB4]], ^[[BB3:.*]]
+! CHECK:       cond_br %{{.*}}, ^[[BB3:.*]], ^[[BB6:.*]]
 ! CHECK-NEXT:     ^[[BB3]]:
-! CHECK:       br ^bb1
+! CHECK:       cond_br %{{.*}}, ^[[BB4:.*]], ^[[BB5:.*]]
 ! CHECK-NEXT:     ^[[BB4]]:
+! CHECK:       br ^[[BB6]]
+! CHECK-NEXT:     ^[[BB5]]:
+! CHECK:       br ^[[BB2]]
+! CHECK-NEXT:     ^[[BB6]]:
 ! CHECK:       omp.yield
 ! CHECK:     }
 ! CHECK:     omp.terminator
@@ -245,13 +270,17 @@ subroutine ss7() ! EXIT inside OpenMP parallel do (inside do loop)
 ! CHECK:  omp.parallel  {
 ! CHECK:    omp.wsloop {{.*}} {
 ! CHECK:      br ^[[BB1:.*]]
-! CHECK:    ^[[BB1]]:
-! CHECK:      cond_br %{{.*}}, ^[[BB2:.*]], ^[[BB4:.*]]
+! CHECK-NEXT:    ^[[BB1]]:
+! CHECK:      br ^[[BB2:.*]]
 ! CHECK:    ^[[BB2]]:
-! CHECK:      cond_br %{{.*}}, ^[[BB4]], ^[[BB3:.*]]
+! CHECK:      cond_br %{{.*}}, ^[[BB3:.*]], ^[[BB6:.*]]
 ! CHECK:    ^[[BB3]]:
-! CHECK:      br ^[[BB1]]
+! CHECK:      cond_br %{{.*}}, ^[[BB4:.*]], ^[[BB5:.*]]
 ! CHECK:    ^[[BB4]]:
+! CHECK-NEXT:    br ^[[BB6]]
+! CHECK:    ^[[BB5]]:
+! CHECK:      br ^[[BB2]]
+! CHECK-NEXT:    ^[[BB6]]:
 ! CHECK:      omp.yield
 ! CHECK:    }
 ! CHECK:    omp.terminator
@@ -275,12 +304,14 @@ subroutine ss8() ! EXIT inside OpenMP parallel do
 ! CHECK-NEXT:    omp.parallel  {
 ! CHECK:      br ^[[BB1:.*]]
 ! CHECK:         ^[[BB1]]:
-! CHECK:      cond_br %{{.*}}, ^[[BB2:.*]], ^[[BB4:.*]]
+! CHECK:      cond_br %{{.*}}, ^[[BB2:.*]], ^[[BB5:.*]]
 ! CHECK-NEXT:    ^[[BB2]]:
-! CHECK:      cond_br %{{.*}}, ^[[BB4]], ^[[BB3:.*]]
+! CHECK:      cond_br %{{.*}}, ^[[BB3:.*]], ^[[BB4:.*]]
 ! CHECK-NEXT:    ^[[BB3]]:
-! CHECK:      br ^[[BB1]]
+! CHECK-NEXT:    br ^[[BB5]]
 ! CHECK-NEXT:    ^[[BB4]]:
+! CHECK:      br ^[[BB1]]
+! CHECK-NEXT:    ^[[BB5]]:
 ! CHECK:      omp.terminator
 ! CHECK-NEXT:    }
 ! CHECK:    omp.terminator

diff  --git a/flang/test/Lower/OpenMP/omp-wsloop-chunks.f90 b/flang/test/Lower/OpenMP/omp-wsloop-chunks.f90
index 879c15c41b984..25a53bb16273b 100644
--- a/flang/test/Lower/OpenMP/omp-wsloop-chunks.f90
+++ b/flang/test/Lower/OpenMP/omp-wsloop-chunks.f90
@@ -19,8 +19,10 @@ program wsloop
 ! CHECK:         %[[VAL_3:.*]] = arith.constant 9 : i32
 ! CHECK:         %[[VAL_4:.*]] = arith.constant 1 : i32
 ! CHECK:         %[[VAL_5:.*]] = arith.constant 4 : i32
-! CHECK:         omp.wsloop   schedule(static = %[[VAL_5]] : i32) nowait for  (%[[VAL_6:.*]]) : i32 = (%[[VAL_2]]) to (%[[VAL_3]]) inclusive step (%[[VAL_4]]) {
-! CHECK:           {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[VAL_6]]) : (!fir.ref<i8>, i32) -> i1
+! CHECK:         omp.wsloop   schedule(static = %[[VAL_5]] : i32) nowait for  (%[[ARG0:.*]]) : i32 = (%[[VAL_2]]) to (%[[VAL_3]]) inclusive step (%[[VAL_4]]) {
+! CHECK:           fir.store %[[ARG0]] to %[[STORE_IV:.*]] : !fir.ref<i32>
+! CHECK:           %[[LOAD_IV:.*]] = fir.load %[[STORE_IV]] : !fir.ref<i32>
+! CHECK:           {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) : (!fir.ref<i8>, i32) -> i1
 ! CHECK:           omp.yield
 ! CHECK:         }
 
@@ -35,9 +37,11 @@ program wsloop
 ! CHECK:         %[[VAL_15:.*]] = arith.constant 9 : i32
 ! CHECK:         %[[VAL_16:.*]] = arith.constant 1 : i32
 ! CHECK:         %[[VAL_17:.*]] = arith.constant 4 : i32
-! CHECK:         omp.wsloop   schedule(static = %[[VAL_17]] : i32) nowait for  (%[[VAL_18:.*]]) : i32 = (%[[VAL_14]]) to (%[[VAL_15]]) inclusive step (%[[VAL_16]]) {
+! CHECK:         omp.wsloop   schedule(static = %[[VAL_17]] : i32) nowait for  (%[[ARG1:.*]]) : i32 = (%[[VAL_14]]) to (%[[VAL_15]]) inclusive step (%[[VAL_16]]) {
+! CHECK:           fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref<i32>
 ! CHECK:           %[[VAL_24:.*]] = arith.constant 2 : i32
-! CHECK:           %[[VAL_25:.*]] = arith.muli %[[VAL_24]], %[[VAL_18]] : i32
+! CHECK:           %[[LOAD_IV1:.*]] = fir.load %[[STORE_IV1]] : !fir.ref<i32>
+! CHECK:           %[[VAL_25:.*]] = arith.muli %[[VAL_24]], %[[LOAD_IV1]] : i32
 ! CHECK:           {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[VAL_25]]) : (!fir.ref<i8>, i32) -> i1
 ! CHECK:           omp.yield
 ! CHECK:         }
@@ -57,9 +61,11 @@ program wsloop
 ! CHECK:         %[[VAL_30:.*]] = arith.constant 9 : i32
 ! CHECK:         %[[VAL_31:.*]] = arith.constant 1 : i32
 ! CHECK:         %[[VAL_32:.*]] = fir.load %[[VAL_0]] : !fir.ref<i32>
-! CHECK:         omp.wsloop   schedule(static = %[[VAL_32]] : i32) nowait for  (%[[VAL_33:.*]]) : i32 = (%[[VAL_29]]) to (%[[VAL_30]]) inclusive step (%[[VAL_31]]) {
+! CHECK:         omp.wsloop   schedule(static = %[[VAL_32]] : i32) nowait for  (%[[ARG2:.*]]) : i32 = (%[[VAL_29]]) to (%[[VAL_30]]) inclusive step (%[[VAL_31]]) {
+! CHECK:           fir.store %[[ARG2]] to %[[STORE_IV2:.*]] : !fir.ref<i32>
 ! CHECK:           %[[VAL_39:.*]] = arith.constant 3 : i32
-! CHECK:           %[[VAL_40:.*]] = arith.muli %[[VAL_39]], %[[VAL_33]] : i32
+! CHECK:           %[[LOAD_IV2:.*]] = fir.load %[[STORE_IV2]] : !fir.ref<i32>
+! CHECK:           %[[VAL_40:.*]] = arith.muli %[[VAL_39]], %[[LOAD_IV2]] : i32
 ! CHECK:           {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[VAL_40]]) : (!fir.ref<i8>, i32) -> i1
 ! CHECK:           omp.yield
 ! CHECK:         }

diff  --git a/flang/test/Lower/OpenMP/omp-wsloop-collapse.f90 b/flang/test/Lower/OpenMP/omp-wsloop-collapse.f90
index 8197909a9dd43..e66b2b8936edf 100644
--- a/flang/test/Lower/OpenMP/omp-wsloop-collapse.f90
+++ b/flang/test/Lower/OpenMP/omp-wsloop-collapse.f90
@@ -39,11 +39,17 @@ program wsloop_collapse
   do i = 1, a
      do j= 1, b
         do k = 1, c
-! CHECK:           omp.wsloop collapse(3) for (%[[VAL_9:.*]], %[[VAL_10:.*]], %[[VAL_11:.*]]) : i32 = (%[[VAL_20]], %[[VAL_23]], %[[VAL_26]]) to (%[[VAL_21]], %[[VAL_24]], %[[VAL_27]]) inclusive step (%[[VAL_22]], %[[VAL_25]], %[[VAL_28]]) {
+! CHECK:           omp.wsloop collapse(3) for (%[[ARG0:.*]], %[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[VAL_20]], %[[VAL_23]], %[[VAL_26]]) to (%[[VAL_21]], %[[VAL_24]], %[[VAL_27]]) inclusive step (%[[VAL_22]], %[[VAL_25]], %[[VAL_28]]) {
+! CHECK:             fir.store %[[ARG0]] to %[[STORE_IV0:.*]] : !fir.ref<i32>
+! CHECK:             fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref<i32>
+! CHECK:             fir.store %[[ARG2]] to %[[STORE_IV2:.*]] : !fir.ref<i32>
 ! CHECK:             %[[VAL_12:.*]] = fir.load %[[VAL_6]] : !fir.ref<i32>
-! CHECK:             %[[VAL_13:.*]] = arith.addi %[[VAL_12]], %[[VAL_9]] : i32
-! CHECK:             %[[VAL_14:.*]] = arith.addi %[[VAL_13]], %[[VAL_10]] : i32
-! CHECK:             %[[VAL_15:.*]] = arith.addi %[[VAL_14]], %[[VAL_11]] : i32
+! CHECK:             %[[LOAD_IV0:.*]] = fir.load %[[STORE_IV0]] : !fir.ref<i32>
+! CHECK:             %[[VAL_13:.*]] = arith.addi %[[VAL_12]], %[[LOAD_IV0]] : i32
+! CHECK:             %[[LOAD_IV1:.*]] = fir.load %[[STORE_IV1]] : !fir.ref<i32>
+! CHECK:             %[[VAL_14:.*]] = arith.addi %[[VAL_13]], %[[LOAD_IV1]] : i32
+! CHECK:             %[[LOAD_IV2:.*]] = fir.load %[[STORE_IV2]] : !fir.ref<i32>
+! CHECK:             %[[VAL_15:.*]] = arith.addi %[[VAL_14]], %[[LOAD_IV2]] : i32
 ! CHECK:             fir.store %[[VAL_15]] to %[[VAL_6]] : !fir.ref<i32>
 ! CHECK:             omp.yield
 ! CHECK:           }

diff  --git a/flang/test/Lower/OpenMP/omp-wsloop-variable.f90 b/flang/test/Lower/OpenMP/omp-wsloop-variable.f90
index 2bffcd77dae27..261088c4df740 100644
--- a/flang/test/Lower/OpenMP/omp-wsloop-variable.f90
+++ b/flang/test/Lower/OpenMP/omp-wsloop-variable.f90
@@ -14,18 +14,22 @@ program wsloop_variable
   integer(kind=16) :: i16, i16_lb
   real :: x
 
-!CHECK:  [[TMP0:%.*]] = arith.constant 1 : i32
-!CHECK:  [[TMP1:%.*]] = arith.constant 100 : i32
-!CHECK:  [[TMP2:%.*]] = fir.convert [[TMP0]] : (i32) -> i64
-!CHECK:  [[TMP3:%.*]] = fir.convert %{{.*}} : (i8) -> i64
-!CHECK:  [[TMP4:%.*]] = fir.convert %{{.*}} : (i16) -> i64
-!CHECK:  [[TMP5:%.*]] = fir.convert %{{.*}} : (i128) -> i64
-!CHECK:  [[TMP6:%.*]] = fir.convert [[TMP1]] : (i32) -> i64
-!CHECK:  [[TMP7:%.*]] = fir.convert %{{.*}} : (i32) -> i64
-!CHECK:  omp.wsloop collapse(2) for ([[TMP8:%.*]], [[TMP9:%.*]]) : i64 = ([[TMP2]], [[TMP5]]) to ([[TMP3]], [[TMP6]]) inclusive step ([[TMP4]], [[TMP7]]) {
-!CHECK:    [[TMP10:%.*]] = arith.addi [[TMP8]], [[TMP9]] : i64
-!CHECK:    [[TMP11:%.*]] = fir.convert [[TMP10]] : (i64) -> f32
-!CHECK:    fir.store [[TMP11]] to %{{.*}} : !fir.ref<f32>
+!CHECK:  %[[TMP0:.*]] = arith.constant 1 : i32
+!CHECK:  %[[TMP1:.*]] = arith.constant 100 : i32
+!CHECK:  %[[TMP2:.*]] = fir.convert %[[TMP0]] : (i32) -> i64
+!CHECK:  %[[TMP3:.*]] = fir.convert %{{.*}} : (i8) -> i64
+!CHECK:  %[[TMP4:.*]] = fir.convert %{{.*}} : (i16) -> i64
+!CHECK:  %[[TMP5:.*]] = fir.convert %{{.*}} : (i128) -> i64
+!CHECK:  %[[TMP6:.*]] = fir.convert %[[TMP1]] : (i32) -> i64
+!CHECK:  %[[TMP7:.*]] = fir.convert %{{.*}} : (i32) -> i64
+!CHECK:  omp.wsloop collapse(2) for (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) {
+!CHECK:    fir.store %[[ARG0]] to %[[STORE_IV0:.*]] : !fir.ref<i64>
+!CHECK:    fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref<i64>
+!CHECK:    %[[LOAD_IV0:.*]] = fir.load %[[STORE_IV0]] : !fir.ref<i64>
+!CHECK:    %[[LOAD_IV1:.*]] = fir.load %[[STORE_IV1]] : !fir.ref<i64>
+!CHECK:    %[[TMP10:.*]] = arith.addi %[[LOAD_IV0]], %[[LOAD_IV1]] : i64
+!CHECK:    %[[TMP11:.*]] = fir.convert %[[TMP10]] : (i64) -> f32
+!CHECK:    fir.store %[[TMP11]] to %{{.*}} : !fir.ref<f32>
 !CHECK:    omp.yield
 !CHECK:  }
 
@@ -37,12 +41,15 @@ program wsloop_variable
   end do
   !$omp end do
 
-!CHECK:  [[TMP12:%.*]] = arith.constant 1 : i32
-!CHECK:  [[TMP13:%.*]] = fir.convert %{{.*}} : (i8) -> i32
-!CHECK:  [[TMP14:%.*]] = fir.convert %{{.*}} : (i64) -> i32
-!CHECK:  omp.wsloop for ([[TMP15:%.*]]) : i32 = ([[TMP12]]) to ([[TMP13]]) inclusive step ([[TMP14]])  {
-!CHECK:    [[TMP16:%.*]] = fir.convert [[TMP15]] : (i32) -> f32
-!CHECK:    fir.store [[TMP16]] to %{{.*}} : !fir.ref<f32>
+!CHECK:  %[[TMP12:.*]] = arith.constant 1 : i32
+!CHECK:  %[[TMP13:.*]] = fir.convert %{{.*}} : (i8) -> i32
+!CHECK:  %[[TMP14:.*]] = fir.convert %{{.*}} : (i64) -> i32
+!CHECK:  omp.wsloop for (%[[ARG0:.*]]) : i32 = (%[[TMP12]]) to (%[[TMP13]]) inclusive step (%[[TMP14]])  {
+!CHECK:    fir.store %[[ARG0]] to %[[STORE3:.*]] : !fir.ref<i32>
+!CHECK:    %[[LOAD3:.*]] = fir.load %[[STORE3]] : !fir.ref<i32>
+!CHECK:    %[[TMP16:.*]] = fir.convert %[[LOAD3]] : (i32) -> f32
+
+!CHECK:    fir.store %[[TMP16]] to %{{.*}} : !fir.ref<f32>
 !CHECK:    omp.yield
 !CHECK:  }
 
@@ -52,12 +59,14 @@ program wsloop_variable
   end do
   !$omp end do
 
-!CHECK:  [[TMP17:%.*]] = fir.convert %{{.*}} : (i8) -> i64
-!CHECK:  [[TMP18:%.*]] = fir.convert %{{.*}} : (i16) -> i64
-!CHECK:  [[TMP19:%.*]] = fir.convert %{{.*}} : (i32) -> i64
-!CHECK:  omp.wsloop for ([[TMP20:%.*]]) : i64 = ([[TMP17]]) to ([[TMP18]]) inclusive step ([[TMP19]])  {
-!CHECK:    [[TMP21:%.*]] = fir.convert [[TMP20]] : (i64) -> f32
-!CHECK:    fir.store [[TMP21]] to %{{.*}} : !fir.ref<f32>
+!CHECK:  %[[TMP17:.*]] = fir.convert %{{.*}} : (i8) -> i64
+!CHECK:  %[[TMP18:.*]] = fir.convert %{{.*}} : (i16) -> i64
+!CHECK:  %[[TMP19:.*]] = fir.convert %{{.*}} : (i32) -> i64
+!CHECK:  omp.wsloop for (%[[ARG1:.*]]) : i64 = (%[[TMP17]]) to (%[[TMP18]]) inclusive step (%[[TMP19]])  {
+!CHECK:    fir.store %[[ARG1]] to %[[STORE4:.*]] : !fir.ref<i64>
+!CHECK:    %[[LOAD4:.*]] = fir.load %[[STORE4]] : !fir.ref<i64>
+!CHECK:    %[[TMP21:.*]] = fir.convert %[[LOAD4]] : (i64) -> f32
+!CHECK:    fir.store %[[TMP21]] to %{{.*}} : !fir.ref<f32>
 !CHECK:    omp.yield
 !CHECK:  }
 
@@ -82,7 +91,8 @@ end program wsloop_variable
 !CHECK:         %[[VAL_9:.*]] = fir.load %[[VAL_3]] : !fir.ref<i16>
 !CHECK:         %[[VAL_10:.*]] = fir.convert %[[VAL_8]] : (i8) -> i32
 !CHECK:         %[[VAL_11:.*]] = fir.convert %[[VAL_9]] : (i16) -> i32
-!CHECK:         omp.wsloop   for  (%[[VAL_12:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+!CHECK:         omp.wsloop   for  (%[[ARG0:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+!CHECK:           fir.store %[[ARG0]] to %[[STORE_IV:.*]] : !fir.ref<i32>
 !CHECK:           %[[VAL_13:.*]] = fir.load %[[VAL_0]] : !fir.ref<i128>
 !CHECK:           %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i128) -> index
 !CHECK:           %[[VAL_15:.*]] = arith.constant 100 : i32
@@ -92,7 +102,8 @@ end program wsloop_variable
 !CHECK:           %[[VAL_19:.*]] = fir.do_loop %[[VAL_20:.*]] = %[[VAL_14]] to %[[VAL_16]] step %[[VAL_18]] -> index {
 !CHECK:             %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (index) -> i64
 !CHECK:             fir.store %[[VAL_21]] to %[[VAL_5]] : !fir.ref<i64>
-!CHECK:             %[[VAL_22:.*]] = fir.convert %[[VAL_12]] : (i32) -> i64
+!CHECK:             %[[LOAD_IV:.*]] = fir.load %[[STORE_IV]] : !fir.ref<i32>
+!CHECK:             %[[VAL_22:.*]] = fir.convert %[[LOAD_IV]] : (i32) -> i64
 !CHECK:             %[[VAL_23:.*]] = fir.load %[[VAL_5]] : !fir.ref<i64>
 !CHECK:             %[[VAL_24:.*]] = arith.addi %[[VAL_22]], %[[VAL_23]] : i64
 !CHECK:             %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (i64) -> f32

diff  --git a/flang/test/Lower/OpenMP/omp-wsloop.f90 b/flang/test/Lower/OpenMP/omp-wsloop.f90
index 52de0a2a09c8d..619ec90eb705d 100644
--- a/flang/test/Lower/OpenMP/omp-wsloop.f90
+++ b/flang/test/Lower/OpenMP/omp-wsloop.f90
@@ -7,13 +7,16 @@ subroutine simple_loop
   integer :: i
   ! CHECK:  omp.parallel
   !$OMP PARALLEL
+  ! CHECK:     %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
   ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
   ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP DO
   do i=1, 9
-  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref<i8>, i32) -> i1
+  ! CHECK:             fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref<i32>
+  ! CHECK:             %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref<i32>
+  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) : (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
   ! CHECK:       omp.yield
@@ -27,13 +30,16 @@ subroutine simple_loop_with_step
   integer :: i
   ! CHECK:  omp.parallel
   !$OMP PARALLEL
+  ! CHECK:     %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
   ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:     %[[WS_STEP:.*]] = arith.constant 2 : i32
   ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:       fir.store %[[I]] to %[[ALLOCA_IV]] : !fir.ref<i32>
+  ! CHECK:       %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref<i32>
   !$OMP DO
   do i=1, 9, 2
-  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref<i8>, i32) -> i1
+  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) : (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
   ! CHECK:       omp.yield
@@ -47,13 +53,16 @@ subroutine loop_with_schedule_nowait
   integer :: i
   ! CHECK:  omp.parallel
   !$OMP PARALLEL
+  ! CHECK:     %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
   ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
   ! CHECK:     omp.wsloop schedule(runtime) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP DO SCHEDULE(runtime)
   do i=1, 9
-  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref<i8>, i32) -> i1
+  ! CHECK:       fir.store %[[I]] to %[[ALLOCA_IV]] : !fir.ref<i32>
+  ! CHECK:       %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref<i32>
+  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) : (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
   ! CHECK:       omp.yield