[flang-commits] [flang] 2b60ed4 - [flang] Use Assign() runtime for copy-in/copy-out.

Slava Zakharin via flang-commits flang-commits at lists.llvm.org
Wed Dec 21 09:55:42 PST 2022


Author: Slava Zakharin
Date: 2022-12-21T09:55:33-08:00
New Revision: 2b60ed405b8110b20ab2e383839759ea34003127

URL: https://github.com/llvm/llvm-project/commit/2b60ed405b8110b20ab2e383839759ea34003127
DIFF: https://github.com/llvm/llvm-project/commit/2b60ed405b8110b20ab2e383839759ea34003127.diff

LOG: [flang] Use Assign() runtime for copy-in/copy-out.

The loops generated under IsContiguous check for copy-in/copy-out
result in LLVM backend spending too much time optimizing them.
At the same time, the copy loops do not provide any optimization
opportunities with the surrounding code (since they are executed
under runtime IsContiguous check), so the copy code may be optimized
on its own and this can be done in runtime.

I thought I could implement and use new APIs for packing/unpacking
non-contiguous data (interfaces added in D136378), but then I found
that Assign() is already doing what is needed. If performance
becomes an issue for these loops, we can optimize code in Assign()
rather than creating new APIs.

Thus, this change makes use of Assign() for copy-in/copy-out
of boxed objects, and this is done only if the objects
are non-contiguous during execution. Copies for non-boxed
objects (e.g. for passing as VALUE dummy argument) are still
done inline, because they can potentially be optimized with
surrounding loops.

I added internal -inline-copyinout-for-boxes option to revert to the old
behavior just to make it easier to triage performance regressions,
if any appear after the change.

CPU2017/521.wrf compiles for 2179 seconds without the change and
the module_dm.f90 compiled with -O0 (without -O0 this single
module compiles for 5775 seconds). With the change total compilation
time of the benchmark reduces to 722 seconds.

Differential Revision: https://reviews.llvm.org/D140446

Added: 
    

Modified: 
    flang/lib/Lower/ConvertExpr.cpp
    flang/test/Lower/call-by-value-attr.f90
    flang/test/Lower/call-copy-in-out.f90
    flang/test/Lower/dummy-argument-assumed-shape-optional.f90
    flang/test/Lower/dummy-argument-optional-2.f90
    flang/test/Lower/optional-value-caller.f90
    flang/test/Lower/parent-component.f90

Removed: 
    


################################################################################
diff  --git a/flang/lib/Lower/ConvertExpr.cpp b/flang/lib/Lower/ConvertExpr.cpp
index 619430402fb5c..a5ff7dcd12a17 100644
--- a/flang/lib/Lower/ConvertExpr.cpp
+++ b/flang/lib/Lower/ConvertExpr.cpp
@@ -35,6 +35,7 @@
 #include "flang/Optimizer/Builder/Character.h"
 #include "flang/Optimizer/Builder/Complex.h"
 #include "flang/Optimizer/Builder/Factory.h"
+#include "flang/Optimizer/Builder/Runtime/Assign.h"
 #include "flang/Optimizer/Builder/Runtime/Character.h"
 #include "flang/Optimizer/Builder/Runtime/Derived.h"
 #include "flang/Optimizer/Builder/Runtime/RTBuilder.h"
@@ -105,6 +106,26 @@ static llvm::cl::opt<bool> optimizeTranspose(
     llvm::cl::desc("lower transpose without using a runtime call"),
     llvm::cl::init(true));
 
+// When copy-in/copy-out is generated for a boxed object we may
+// either produce loops to copy the data or call the Fortran runtime's
+// Assign function. Since the data copy happens under a runtime check
+// (for IsContiguous) the copy loops can hardly provide any value
+// to optimizations, instead, the optimizer just wastes compilation
+// time on these loops.
+//
+// This internal option will force the loops generation, when set
+// to true. It is false by default.
+//
+// Note that for copy-in/copy-out of non-boxed objects (e.g. for passing
+// arguments by value) we always generate loops. Since the memory for
+// such objects is contiguous, it may be better to expose them
+// to the optimizer.
+static llvm::cl::opt<bool> inlineCopyInOutForBoxes(
+    "inline-copyinout-for-boxes",
+    llvm::cl::desc(
+        "generate loops for copy-in/copy-out of objects with descriptors"),
+    llvm::cl::init(false));
+
 /// The various semantics of a program constituent (or a part thereof) as it may
 /// appear in an expression.
 ///
@@ -2269,8 +2290,20 @@ class ScalarExprLowering {
 
     auto doCopyIn = [&]() -> ExtValue {
       ExtValue temp = genArrayTempFromMold(actualArg, tempName);
-      if (arg.mayBeReadByCall())
+      if (!arg.mayBeReadByCall()) {
+        return temp;
+      }
+      if (!isActualArgBox || inlineCopyInOutForBoxes) {
         genArrayCopy(temp, actualArg);
+        return temp;
+      }
+
+      // Generate Assign() call to copy data from the actualArg
+      // to a temporary.
+      mlir::Value destBox = fir::getBase(builder.createBox(loc, temp));
+      mlir::Value boxRef = builder.createTemporary(loc, destBox.getType());
+      builder.create<fir::StoreOp>(loc, destBox, boxRef);
+      fir::runtime::genAssign(builder, loc, boxRef, fir::getBase(actualArg));
       return temp;
     };
 
@@ -2366,17 +2399,38 @@ class ScalarExprLowering {
   /// has been copied-in into a contiguous temp.
   void genCopyOut(const CopyOutPair &copyOutPair) {
     mlir::Location loc = getLoc();
-    if (!copyOutPair.restrictCopyAndFreeAtRuntime) {
-      if (copyOutPair.argMayBeModifiedByCall)
+    bool isActualArgBox =
+        fir::isa_box_type(fir::getBase(copyOutPair.var).getType());
+    auto doCopyOut = [&]() {
+      if (!copyOutPair.argMayBeModifiedByCall) {
+        return;
+      }
+      if (!isActualArgBox || inlineCopyInOutForBoxes) {
         genArrayCopy(copyOutPair.var, copyOutPair.temp);
+        return;
+      }
+      // Generate Assign() call to copy data from the temporary
+      // to the actualArg. Note that in case the actual argument
+      // is ALLOCATABLE/POINTER the Assign() implementation
+      // should not engage its reallocation, because the temporary
+      // is rank, shape and type compatible with it.
+      mlir::Value srcBox =
+          fir::getBase(builder.createBox(loc, copyOutPair.temp));
+      mlir::Value destBox =
+          fir::getBase(builder.createBox(loc, copyOutPair.var));
+      mlir::Value destBoxRef = builder.createTemporary(loc, destBox.getType());
+      builder.create<fir::StoreOp>(loc, destBox, destBoxRef);
+      fir::runtime::genAssign(builder, loc, destBoxRef, srcBox);
+    };
+    if (!copyOutPair.restrictCopyAndFreeAtRuntime) {
+      doCopyOut();
       builder.create<fir::FreeMemOp>(loc, fir::getBase(copyOutPair.temp));
       return;
     }
 
     builder.genIfThen(loc, *copyOutPair.restrictCopyAndFreeAtRuntime)
         .genThen([&]() {
-          if (copyOutPair.argMayBeModifiedByCall)
-            genArrayCopy(copyOutPair.var, copyOutPair.temp);
+          doCopyOut();
           builder.create<fir::FreeMemOp>(loc, fir::getBase(copyOutPair.temp));
         })
         .end();

diff  --git a/flang/test/Lower/call-by-value-attr.f90 b/flang/test/Lower/call-by-value-attr.f90
index 5e00af8111375..1e4ca4cd03c39 100644
--- a/flang/test/Lower/call-by-value-attr.f90
+++ b/flang/test/Lower/call-by-value-attr.f90
@@ -74,11 +74,11 @@ end subroutine subra
   !CHECK: %[[DIMS:.*]]:3 = fir.box_dims %[[BOX]], %[[CONST_0]] : (!fir.box<!fir.array<11xi32>>, index) -> (index, index, index)
   !CHECK: %[[ARRAY_COPY_2:.*]] = fir.allocmem !fir.array<11xi32>, %[[DIMS]]#1 {uniq_name = ".copy"}
   !CHECK: %[[SHAPE_8:.*]] = fir.shape %[[DIMS]]#1 : (index) -> !fir.shape<1>
-  !CHECK: %[[ARRAY_LOAD_7:.*]] = fir.array_load %[[ARRAY_COPY_2]](%[[SHAPE_8]]) : (!fir.heap<!fir.array<11xi32>>, !fir.shape<1>) -> !fir.array<11xi32>
-  !CHECK: %[[ARRAY_LOAD_8:.*]] = fir.array_load %[[BOX]] : (!fir.box<!fir.array<11xi32>>) -> !fir.array<11xi32>
-  !CHECK: %[[DO_4:.*]] = fir.do_loop {{.*}} {
-  !CHECK: }
-  !CHECK: fir.array_merge_store %[[ARRAY_LOAD_7]], %[[DO_4]] to %[[ARRAY_COPY_2]] : !fir.array<11xi32>, !fir.array<11xi32>, !fir.heap<!fir.array<11xi32>>
+  !CHECK: %[[TEMP_BOX:.*]] = fir.embox %[[ARRAY_COPY_2]](%[[SHAPE_8]]) : (!fir.heap<!fir.array<11xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<11xi32>>
+  !CHECK: fir.store %[[TEMP_BOX]] to %[[TEMP_BOX_LOC:.*]] : !fir.ref<!fir.box<!fir.array<11xi32>>>
+  !CHECK: %[[TEMP_BOX_ADDR:.*]] = fir.convert %[[TEMP_BOX_LOC]] : (!fir.ref<!fir.box<!fir.array<11xi32>>>) -> !fir.ref<!fir.box<none>>
+  !CHECK: %[[BOX_ADDR:.*]] = fir.convert %[[BOX]] : (!fir.box<!fir.array<11xi32>>) -> !fir.box<none>
+  !CHECK: fir.call @_FortranAAssign(%[[TEMP_BOX_ADDR]], %[[BOX_ADDR]], %{{.*}}, %{{.*}}){{.*}}: (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.ref<i8>, i32) -> none
   !CHECK: fir.result %[[ARRAY_COPY_2]] : !fir.heap<!fir.array<11xi32>>
   !CHECK: %[[CONVERT_B:.*]] = fir.convert %[[ADDR]] : (!fir.heap<!fir.array<11xi32>>) -> !fir.ref<!fir.array<10xi32>>
   !CHECK: fir.call @_QPsubra(%[[CONVERT_B]])

diff  --git a/flang/test/Lower/call-copy-in-out.f90 b/flang/test/Lower/call-copy-in-out.f90
index d321006eef38a..fcf0abc41183c 100644
--- a/flang/test/Lower/call-copy-in-out.f90
+++ b/flang/test/Lower/call-copy-in-out.f90
@@ -19,14 +19,11 @@ subroutine test_assumed_shape_to_array(x)
 
 ! Copy-in
 ! CHECK-DAG:  %[[shape:.*]] = fir.shape %[[dim]]#1 : (index) -> !fir.shape<1>
-! CHECK-DAG:  %[[temp_load:.*]] = fir.array_load %[[temp]](%[[shape]]) : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.array<?xf32>
-! CHECK-DAG:  %[[x_load:.*]] = fir.array_load %[[x]] : (!fir.box<!fir.array<?xf32>>) -> !fir.array<?xf32>
-! CHECK:  %[[copyin:.*]] = fir.do_loop %[[i:.*]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[res:.*]] = %[[temp_load]]) -> (!fir.array<?xf32>) {
-! CHECK:    %[[fetch:.*]] = fir.array_fetch %[[x_load]], %[[i]] : (!fir.array<?xf32>, index) -> f32
-! CHECK:    %[[update:.*]] = fir.array_update %[[res]], %[[fetch]], %[[i]] : (!fir.array<?xf32>, f32, index) -> !fir.array<?xf32>
-! CHECK:    fir.result %[[update]] : !fir.array<?xf32>
-! CHECK:  }
-! CHECK:  fir.array_merge_store %[[temp_load]], %[[copyin:.*]] to %[[temp]] : !fir.array<?xf32>, !fir.array<?xf32>, !fir.heap<!fir.array<?xf32>>
+! CHECK-DAG:  %[[temp_box:.*]] = fir.embox %[[temp]](%[[shape]]) : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
+! CHECK-DAG:  fir.store %[[temp_box]] to %[[temp_box_loc:.*]] : !fir.ref<!fir.box<!fir.array<?xf32>>>
+! CHECK-DAG: %[[temp_box_addr:.*]] = fir.convert %[[temp_box_loc]] : (!fir.ref<!fir.box<!fir.array<?xf32>>>) -> !fir.ref<!fir.box<none>>
+! CHECK-DAG: %[[arg_box:.*]] = fir.convert %[[x]] : (!fir.box<!fir.array<?xf32>>) -> !fir.box<none>
+! CHECK-DAG: fir.call @_FortranAAssign(%[[temp_box_addr]], %[[arg_box]], %{{.*}}, %{{.*}}){{.*}}: (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.ref<i8>, i32) -> none
 ! CHECK:  fir.result %[[temp]] : !fir.heap<!fir.array<?xf32>>
 
 ! CHECK:  %[[dim:.*]]:3 = fir.box_dims %[[x]], %c0{{.*}} : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
@@ -34,18 +31,12 @@ subroutine test_assumed_shape_to_array(x)
 ! CHECK:  fir.call @_QPbar(%[[cast]]) {{.*}}: (!fir.ref<!fir.array<?xf32>>) -> ()
 
 ! Copy-out
-! CHECK-DAG:  %[[x_load:.*]] = fir.array_load %[[x]] : (!fir.box<!fir.array<?xf32>>) -> !fir.array<?xf32>
-! CHECK-DAG:  %[[c0:.*]] = arith.constant 0 : index
-
 ! CHECK-DAG:  %[[shape:.*]] = fir.shape %[[dim]]#1 : (index) -> !fir.shape<1>
-! CHECK-DAG:  %[[temp_load:.*]] = fir.array_load %[[addr]](%[[shape]]) : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.array<?xf32>
-! CHECK:  %[[copyout:.*]] = fir.do_loop %[[i:.*]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[res:.*]] = %[[x_load]]) -> (!fir.array<?xf32>) {
-! CHECK:    %[[fetch:.*]] = fir.array_fetch %[[temp_load]], %[[i]] : (!fir.array<?xf32>, index) -> f32
-! CHECK:    %[[update:.*]] = fir.array_update %[[res]], %[[fetch]], %[[i]] : (!fir.array<?xf32>, f32, index) -> !fir.array<?xf32>
-! CHECK:    fir.result %[[update]] : !fir.array<?xf32>
-! CHECK:  }
-! CHECK:  fir.array_merge_store %[[x_load]], %[[copyout:.*]] to %[[x]] : !fir.array<?xf32>, !fir.array<?xf32>, !fir.box<!fir.array<?xf32>>
-
+! CHECK-DAG:  %[[temp_box:.*]] = fir.embox %[[addr]](%[[shape]]) : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
+! CHECK-DAG:  fir.store %[[x]] to %[[arg_box_loc:.*]] : !fir.ref<!fir.box<!fir.array<?xf32>>>
+! CHECK-DAG: %[[arg_box_addr:.*]] = fir.convert %[[arg_box_loc]] : (!fir.ref<!fir.box<!fir.array<?xf32>>>) -> !fir.ref<!fir.box<none>>
+! CHECK-DAG: %[[temp_box_cast:.*]] = fir.convert %[[temp_box]] : (!fir.box<!fir.array<?xf32>>) -> !fir.box<none>
+! CHECK-DAG: fir.call @_FortranAAssign(%[[arg_box_addr]], %[[temp_box_cast]], %{{.*}}, %{{.*}}){{.*}}: (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.ref<i8>, i32) -> none
 ! CHECK: fir.freemem %[[addr]] : !fir.heap<!fir.array<?xf32>>
 
   call bar(x)
@@ -66,7 +57,7 @@ subroutine eval_expr_only_once(x)
 
 ! CHECK: %[[temp:.*]] = fir.allocmem !fir.array<?xf32>
 ! CHECK-NOT: fir.call @_QPonly_once()
-! CHECK:  fir.array_merge_store %{{.*}}, %{{.*}} to %[[temp]]
+! CHECK:  fir.call @_FortranAAssign
 ! CHECK-NOT: fir.call @_QPonly_once()
 
 ! CHECK:  %[[cast:.*]] = fir.convert %[[addr]] : (!fir.heap<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>>
@@ -74,7 +65,7 @@ subroutine eval_expr_only_once(x)
   call bar(x(1:200:only_once()))
 
 ! CHECK-NOT: fir.call @_QPonly_once()
-! CHECK:  fir.array_merge_store %{{.*}}, %{{.*}} to %[[x_section]]
+! CHECK:  fir.call @_FortranAAssign
 ! CHECK-NOT: fir.call @_QPonly_once()
 
 ! CHECK: fir.freemem %[[addr]] : !fir.heap<!fir.array<?xf32>>
@@ -86,10 +77,10 @@ subroutine eval_expr_only_once(x)
 subroutine test_contiguous(x)
   real, contiguous :: x(:)
 ! CHECK: %[[addr:.*]] = fir.box_addr %[[x]] : (!fir.box<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>>
-! CHECK-NOT:  fir.array_merge_store
+! CHECK-NOT:  fir.call @_FortranAAssign
 ! CHECK: fir.call @_QPbar(%[[addr]]) {{.*}}: (!fir.ref<!fir.array<?xf32>>) -> ()
   call bar(x)
-! CHECK-NOT:  fir.array_merge_store
+! CHECK-NOT:  fir.call @_FortranAAssign
 ! CHECK: return
 end subroutine
 
@@ -104,7 +95,7 @@ subroutine test_parenthesis(x)
 ! CHECK:  %[[cast:.*]] = fir.convert %[[temp]] : (!fir.heap<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>>
 ! CHECK:  fir.call @_QPbar(%[[cast]]) {{.*}}: (!fir.ref<!fir.array<?xf32>>) -> ()
   call bar((x))
-! CHECK-NOT:  fir.array_merge_store
+! CHECK-NOT:  fir.call @_FortranAAssign
 ! CHECK: fir.freemem %[[temp]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK: return
 end subroutine
@@ -125,14 +116,14 @@ subroutine bar_intent_out(x)
 ! CHECK: } else {
 ! CHECK: %[[dim:.*]]:3 = fir.box_dims %[[x]], %c0{{.*}} : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
 ! CHECK: %[[temp:.*]] = fir.allocmem !fir.array<?xf32>, %[[dim]]#1
-! CHECK-NOT:  fir.array_merge_store
+! CHECK-NOT:  fir.call @_FortranAAssign
 ! CHECK: %[[not_contiguous:.*]] = arith.cmpi eq, %[[is_contiguous]], %false{{.*}} : i1
 ! CHECK:  %[[cast:.*]] = fir.convert %[[addr]] : (!fir.heap<!fir.array<?xf32>>) -> !fir.ref<!fir.array<100xf32>>
 ! CHECK:  fir.call @_QPbar_intent_out(%[[cast]]) {{.*}}: (!fir.ref<!fir.array<100xf32>>) -> ()
   call bar_intent_out(x)
   
 ! CHECK: fir.if %[[not_contiguous]]
-! CHECK: fir.array_merge_store %{{.*}}, %{{.*}} to %[[x]]
+! CHECK: fir.call @_FortranAAssign
 ! CHECK: fir.freemem %[[addr]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK: return
 end subroutine
@@ -153,13 +144,17 @@ subroutine bar_intent_in(x)
 ! CHECK: } else {
 ! CHECK: %[[dim:.*]]:3 = fir.box_dims %[[x]], %c0{{.*}} : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
 ! CHECK: %[[temp:.*]] = fir.allocmem !fir.array<?xf32>, %[[dim]]#1
-! CHECK:  fir.array_merge_store %{{.*}}, %{{.*}} to %[[temp]]
+! CHECK: %[[temp_shape:.*]] = fir.shape %[[dim]]#1 : (index) -> !fir.shape<1>
+! CHECK: %[[temp_box:.*]] = fir.embox %[[temp]](%[[temp_shape]]) : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
+! CHECK: fir.store %[[temp_box]] to %[[temp_box_loc:.*]] : !fir.ref<!fir.box<!fir.array<?xf32>>>
+! CHECK: %[[temp_box_addr:.*]] = fir.convert %[[temp_box_loc]] : (!fir.ref<!fir.box<!fir.array<?xf32>>>) -> !fir.ref<!fir.box<none>>
+! CHECK: fir.call @_FortranAAssign(%[[temp_box_addr]],
 ! CHECK: %[[not_contiguous:.*]] = arith.cmpi eq, %[[is_contiguous]], %false{{.*}} : i1
 ! CHECK:  %[[cast:.*]] = fir.convert %[[addr]] : (!fir.heap<!fir.array<?xf32>>) -> !fir.ref<!fir.array<100xf32>>
 ! CHECK:  fir.call @_QPbar_intent_in(%[[cast]]) {{.*}}: (!fir.ref<!fir.array<100xf32>>) -> ()
   call bar_intent_in(x)
 ! CHECK: fir.if %[[not_contiguous]]
-! CHECK-NOT:  fir.array_merge_store
+! CHECK-NOT:  fir.call @_FortranAAssign
 ! CHECK: fir.freemem %[[addr]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK: return
 end subroutine
@@ -180,13 +175,13 @@ subroutine bar_intent_inout(x)
 ! CHECK: } else {
 ! CHECK: %[[dim:.*]]:3 = fir.box_dims %[[x]], %c0{{.*}} : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
 ! CHECK: %[[temp:.*]] = fir.allocmem !fir.array<?xf32>, %[[dim]]#1
-! CHECK:  fir.array_merge_store %{{.*}}, %{{.*}} to %[[temp]]
+! CHECK:  fir.call @_FortranAAssign
 ! CHECK: %[[not_contiguous:.*]] = arith.cmpi eq, %[[is_contiguous]], %false{{.*}} : i1
 ! CHECK:  %[[cast:.*]] = fir.convert %[[addr]] : (!fir.heap<!fir.array<?xf32>>) -> !fir.ref<!fir.array<100xf32>>
 ! CHECK:  fir.call @_QPbar_intent_inout(%[[cast]]) {{.*}}: (!fir.ref<!fir.array<100xf32>>) -> ()
   call bar_intent_inout(x)
 ! CHECK: fir.if %[[not_contiguous]]
-! CHECK:  fir.array_merge_store %{{.*}}, %{{.*}} to %[[x]]
+! CHECK:  fir.call @_FortranAAssign
 ! CHECK: fir.freemem %[[addr]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK: return
 end subroutine
@@ -195,64 +190,49 @@ subroutine bar_intent_inout(x)
 ! CHECK-LABEL: func @_QPtest_char(
 ! CHECK-SAME:    %[[VAL_0:.*]]: !fir.box<!fir.array<?x!fir.char<1,10>>>{{.*}}) {
 subroutine test_char(x)
-  ! CHECK: %[[VAL_1:.*]] = arith.constant 10 : index
-  ! CHECK: %[[box_none:.*]] = fir.convert %[[VAL_0]] : (!fir.box<!fir.array<?x!fir.char<1,10>>>) -> !fir.box<none>
-  ! CHECK: %[[is_contiguous:.*]] = fir.call @_FortranAIsContiguous(%[[box_none]]) {{.*}}: (!fir.box<none>) -> i1
-  ! CHECK: %[[addr:.*]] = fir.if %[[is_contiguous]]
-  ! CHECK: } else {
-  ! CHECK: %[[VAL_2:.*]] = arith.constant 0 : index
-  ! CHECK: %[[VAL_3:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box<!fir.array<?x!fir.char<1,10>>>, index) -> (index, index, index)
-  ! CHECK: %[[VAL_4:.*]] = fir.allocmem !fir.array<?x!fir.char<1,10>>, %[[VAL_3]]#1 {uniq_name = ".copyinout"}
-  ! CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_3]]#1 : (index) -> !fir.shape<1>
-  ! CHECK: %[[VAL_6:.*]] = fir.array_load %[[VAL_4]](%[[VAL_5]]) : (!fir.heap<!fir.array<?x!fir.char<1,10>>>, !fir.shape<1>) -> !fir.array<?x!fir.char<1,10>>
-  ! CHECK: %[[VAL_7:.*]] = fir.array_load %[[VAL_0]] : (!fir.box<!fir.array<?x!fir.char<1,10>>>) -> !fir.array<?x!fir.char<1,10>>
-  ! CHECK: %[[VAL_8:.*]] = arith.constant 1 : index
-  ! CHECK: %[[VAL_9:.*]] = arith.constant 0 : index
-  ! CHECK: %[[VAL_10:.*]] = arith.subi %[[VAL_3]]#1, %[[VAL_8]] : index
-  ! CHECK: %[[VAL_11:.*]] = fir.do_loop %[[VAL_12:.*]] = %[[VAL_9]] to %[[VAL_10]] step %[[VAL_8]] unordered iter_args(%[[VAL_13:.*]] = %[[VAL_6]]) -> (!fir.array<?x!fir.char<1,10>>) {
-  ! CHECK: %[[VAL_14:.*]] = fir.array_access %[[VAL_7]], %[[VAL_12]] : (!fir.array<?x!fir.char<1,10>>, index) -> !fir.ref<!fir.char<1,10>>
-  ! CHECK: %[[VAL_15:.*]] = fir.array_access %[[VAL_13]], %[[VAL_12]] : (!fir.array<?x!fir.char<1,10>>, index) -> !fir.ref<!fir.char<1,10>>
-  ! CHECK: %[[VAL_16:.*]] = arith.constant 10 : index
-  ! CHECK: %[[VAL_17:.*]] = arith.constant 1 : i64
-  ! CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_16]] : (index) -> i64
-  ! CHECK: %[[VAL_19:.*]] = arith.muli %[[VAL_17]], %[[VAL_18]] : i64
-  ! CHECK: %[[VAL_20:.*]] = arith.constant false
-  ! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_15]] : (!fir.ref<!fir.char<1,10>>) -> !fir.ref<i8>
-  ! CHECK: %[[VAL_22:.*]] = fir.convert %[[VAL_14]] : (!fir.ref<!fir.char<1,10>>) -> !fir.ref<i8>
-  ! CHECK: fir.call @llvm.memmove.p0.p0.i64(%[[VAL_21]], %[[VAL_22]], %[[VAL_19]], %[[VAL_20]]) {{.*}}: (!fir.ref<i8>, !fir.ref<i8>, i64, i1) -> ()
-  ! CHECK: %[[VAL_23:.*]] = fir.array_amend %[[VAL_13]], %[[VAL_15]] : (!fir.array<?x!fir.char<1,10>>, !fir.ref<!fir.char<1,10>>) -> !fir.array<?x!fir.char<1,10>>
-  ! CHECK: fir.result %[[VAL_23]] : !fir.array<?x!fir.char<1,10>>
-  ! CHECK: }
-  ! CHECK: fir.array_merge_store %[[VAL_6]], %[[VAL_24:.*]] to %[[VAL_4]] : !fir.array<?x!fir.char<1,10>>, !fir.array<?x!fir.char<1,10>>, !fir.heap<!fir.array<?x!fir.char<1,10>>>
-  ! CHECK: %[[dim:.*]]:3 = fir.box_dims %[[VAL_0]], %c0{{.*}} : (!fir.box<!fir.array<?x!fir.char<1,10>>>, index) -> (index, index, index)
-  ! CHECK: %[[VAL_25:.*]] = fir.convert %[[addr]] : (!fir.heap<!fir.array<?x!fir.char<1,10>>>) -> !fir.ref<!fir.char<1,?>>
-  ! CHECK: %[[VAL_26:.*]] = fir.emboxchar %[[VAL_25]], %[[VAL_1]] : (!fir.ref<!fir.char<1,?>>, index) -> !fir.boxchar<1>
-  ! CHECK: fir.call @_QPbar_char(%[[VAL_26]]) {{.*}}: (!fir.boxchar<1>) -> ()
-  ! CHECK: %[[VAL_27:.*]] = fir.array_load %[[VAL_0]] : (!fir.box<!fir.array<?x!fir.char<1,10>>>) -> !fir.array<?x!fir.char<1,10>>
-  ! CHECK: %[[VAL_28:.*]] = arith.constant 0 : index
-  ! CHECK: %[[VAL_29:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_28]] : (!fir.box<!fir.array<?x!fir.char<1,10>>>, index) -> (index, index, index)
-  ! CHECK: %[[VAL_30:.*]] = fir.shape %[[dim]]#1 : (index) -> !fir.shape<1>
-  ! CHECK: %[[VAL_31:.*]] = fir.array_load %[[addr]](%[[VAL_30]]) : (!fir.heap<!fir.array<?x!fir.char<1,10>>>, !fir.shape<1>) -> !fir.array<?x!fir.char<1,10>>
-  ! CHECK: %[[VAL_32:.*]] = arith.constant 1 : index
-  ! CHECK: %[[VAL_33:.*]] = arith.constant 0 : index
-  ! CHECK: %[[VAL_34:.*]] = arith.subi %[[VAL_29]]#1, %[[VAL_32]] : index
-  ! CHECK: %[[VAL_35:.*]] = fir.do_loop %[[VAL_36:.*]] = %[[VAL_33]] to %[[VAL_34]] step %[[VAL_32]] unordered iter_args(%[[VAL_37:.*]] = %[[VAL_27]]) -> (!fir.array<?x!fir.char<1,10>>) {
-  ! CHECK: %[[VAL_38:.*]] = fir.array_access %[[VAL_31]], %[[VAL_36]] : (!fir.array<?x!fir.char<1,10>>, index) -> !fir.ref<!fir.char<1,10>>
-  ! CHECK: %[[VAL_39:.*]] = fir.array_access %[[VAL_37]], %[[VAL_36]] : (!fir.array<?x!fir.char<1,10>>, index) -> !fir.ref<!fir.char<1,10>>
-  ! CHECK: %[[VAL_40:.*]] = arith.constant 10 : index
-  ! CHECK: %[[VAL_41:.*]] = arith.constant 1 : i64
-  ! CHECK: %[[VAL_42:.*]] = fir.convert %[[VAL_40]] : (index) -> i64
-  ! CHECK: %[[VAL_43:.*]] = arith.muli %[[VAL_41]], %[[VAL_42]] : i64
-  ! CHECK: %[[VAL_44:.*]] = arith.constant false
-  ! CHECK: %[[VAL_45:.*]] = fir.convert %[[VAL_39]] : (!fir.ref<!fir.char<1,10>>) -> !fir.ref<i8>
-  ! CHECK: %[[VAL_46:.*]] = fir.convert %[[VAL_38]] : (!fir.ref<!fir.char<1,10>>) -> !fir.ref<i8>
-  ! CHECK: fir.call @llvm.memmove.p0.p0.i64(%[[VAL_45]], %[[VAL_46]], %[[VAL_43]], %[[VAL_44]]) {{.*}}: (!fir.ref<i8>, !fir.ref<i8>, i64, i1) -> ()
-  ! CHECK: %[[VAL_47:.*]] = fir.array_amend %[[VAL_37]], %[[VAL_39]] : (!fir.array<?x!fir.char<1,10>>, !fir.ref<!fir.char<1,10>>) -> !fir.array<?x!fir.char<1,10>>
-  ! CHECK: fir.result %[[VAL_47]] : !fir.array<?x!fir.char<1,10>>
-  ! CHECK: }
-  ! CHECK: fir.array_merge_store %[[VAL_27]], %[[VAL_48:.*]] to %[[VAL_0]] : !fir.array<?x!fir.char<1,10>>, !fir.array<?x!fir.char<1,10>>, !fir.box<!fir.array<?x!fir.char<1,10>>>
-  ! CHECK: fir.freemem %[[addr]] : !fir.heap<!fir.array<?x!fir.char<1,10>>>
-
+  ! CHECK:         %[[VAL_1:.*]] = fir.alloca !fir.box<!fir.array<?x!fir.char<1,10>>>
+  ! CHECK:         %[[VAL_2:.*]] = fir.alloca !fir.box<!fir.array<?x!fir.char<1,10>>>
+  ! CHECK:         %[[VAL_3:.*]] = arith.constant 10 : index
+  ! CHECK:         %[[VAL_4:.*]] = fir.convert %[[VAL_0]] : (!fir.box<!fir.array<?x!fir.char<1,10>>>) -> !fir.box<none>
+  ! CHECK:         %[[VAL_5:.*]] = fir.call @_FortranAIsContiguous(%[[VAL_4]]) fastmath<contract> : (!fir.box<none>) -> i1
+  ! CHECK:         %[[VAL_6:.*]] = fir.if %[[VAL_5]] -> (!fir.heap<!fir.array<?x!fir.char<1,10>>>) {
+  ! CHECK:           %[[VAL_7:.*]] = fir.box_addr %[[VAL_0]] : (!fir.box<!fir.array<?x!fir.char<1,10>>>) -> !fir.heap<!fir.array<?x!fir.char<1,10>>>
+  ! CHECK:           fir.result %[[VAL_7]] : !fir.heap<!fir.array<?x!fir.char<1,10>>>
+  ! CHECK:         } else {
+  ! CHECK:           %[[VAL_8:.*]] = arith.constant 0 : index
+  ! CHECK:           %[[VAL_9:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_8]] : (!fir.box<!fir.array<?x!fir.char<1,10>>>, index) -> (index, index, index)
+  ! CHECK:           %[[VAL_10:.*]] = fir.allocmem !fir.array<?x!fir.char<1,10>>, %[[VAL_9]]#1 {uniq_name = ".copyinout"}
+  ! CHECK:           %[[VAL_11:.*]] = fir.shape %[[VAL_9]]#1 : (index) -> !fir.shape<1>
+  ! CHECK:           %[[VAL_12:.*]] = fir.embox %[[VAL_10]](%[[VAL_11]]) : (!fir.heap<!fir.array<?x!fir.char<1,10>>>, !fir.shape<1>) -> !fir.box<!fir.array<?x!fir.char<1,10>>>
+  ! CHECK:           fir.store %[[VAL_12]] to %[[VAL_2]] : !fir.ref<!fir.box<!fir.array<?x!fir.char<1,10>>>>
+  ! CHECK:           %[[VAL_13:.*]] = fir.address_of(@_QQcl.{{.*}}) : !fir.ref<!fir.char<1,{{.*}}>>
+  ! CHECK:           %[[VAL_14:.*]] = arith.constant {{.*}} : i32
+  ! CHECK:           %[[VAL_15:.*]] = fir.convert %[[VAL_2]] : (!fir.ref<!fir.box<!fir.array<?x!fir.char<1,10>>>>) -> !fir.ref<!fir.box<none>>
+  ! CHECK:           %[[VAL_16:.*]] = fir.convert %[[VAL_0]] : (!fir.box<!fir.array<?x!fir.char<1,10>>>) -> !fir.box<none>
+  ! CHECK:           %[[VAL_17:.*]] = fir.convert %[[VAL_13]] : (!fir.ref<!fir.char<1,{{.*}}>>) -> !fir.ref<i8>
+  ! CHECK:           %[[VAL_18:.*]] = fir.call @_FortranAAssign(%[[VAL_15]], %[[VAL_16]], %[[VAL_17]], %[[VAL_14]]) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.ref<i8>, i32) -> none
+  ! CHECK:           fir.result %[[VAL_10]] : !fir.heap<!fir.array<?x!fir.char<1,10>>>
+  ! CHECK:         }
+  ! CHECK:         %[[VAL_19:.*]] = arith.constant 0 : index
+  ! CHECK:         %[[VAL_20:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_19]] : (!fir.box<!fir.array<?x!fir.char<1,10>>>, index) -> (index, index, index)
+  ! CHECK:         %[[VAL_21:.*]] = arith.constant false
+  ! CHECK:         %[[VAL_22:.*]] = arith.cmpi eq, %[[VAL_5]], %[[VAL_21]] : i1
+  ! CHECK:         %[[VAL_23:.*]] = fir.convert %[[VAL_24:.*]] : (!fir.heap<!fir.array<?x!fir.char<1,10>>>) -> !fir.ref<!fir.char<1,?>>
+  ! CHECK:         %[[VAL_25:.*]] = fir.emboxchar %[[VAL_23]], %[[VAL_3]] : (!fir.ref<!fir.char<1,?>>, index) -> !fir.boxchar<1>
+  ! CHECK:         fir.call @_QPbar_char(%[[VAL_25]]) fastmath<contract> : (!fir.boxchar<1>) -> ()
+  ! CHECK:         fir.if %[[VAL_22]] {
+  ! CHECK:           %[[VAL_26:.*]] = fir.shape %[[VAL_20]]#1 : (index) -> !fir.shape<1>
+  ! CHECK:           %[[VAL_27:.*]] = fir.embox %[[VAL_24]](%[[VAL_26]]) : (!fir.heap<!fir.array<?x!fir.char<1,10>>>, !fir.shape<1>) -> !fir.box<!fir.array<?x!fir.char<1,10>>>
+  ! CHECK:           fir.store %[[VAL_0]] to %[[VAL_1]] : !fir.ref<!fir.box<!fir.array<?x!fir.char<1,10>>>>
+  ! CHECK:           %[[VAL_28:.*]] = fir.address_of(@_QQcl.{{.*}}) : !fir.ref<!fir.char<1,{{.*}}>>
+  ! CHECK:           %[[VAL_29:.*]] = arith.constant {{.*}} : i32
+  ! CHECK:           %[[VAL_30:.*]] = fir.convert %[[VAL_1]] : (!fir.ref<!fir.box<!fir.array<?x!fir.char<1,10>>>>) -> !fir.ref<!fir.box<none>>
+  ! CHECK:           %[[VAL_31:.*]] = fir.convert %[[VAL_27]] : (!fir.box<!fir.array<?x!fir.char<1,10>>>) -> !fir.box<none>
+  ! CHECK:           %[[VAL_32:.*]] = fir.convert %[[VAL_28]] : (!fir.ref<!fir.char<1,{{.*}}>>) -> !fir.ref<i8>
+  ! CHECK:           %[[VAL_33:.*]] = fir.call @_FortranAAssign(%[[VAL_30]], %[[VAL_31]], %[[VAL_32]], %[[VAL_29]]) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.ref<i8>, i32) -> none
+  ! CHECK:           fir.freemem %[[VAL_24]] : !fir.heap<!fir.array<?x!fir.char<1,10>>>
+  ! CHECK:         }
+  
   character(10) :: x(:)
   call bar_char(x)
   ! CHECK:         return

diff  --git a/flang/test/Lower/dummy-argument-assumed-shape-optional.f90 b/flang/test/Lower/dummy-argument-assumed-shape-optional.f90
index d033e903ef4f9..01f774f1713f1 100644
--- a/flang/test/Lower/dummy-argument-assumed-shape-optional.f90
+++ b/flang/test/Lower/dummy-argument-assumed-shape-optional.f90
@@ -29,9 +29,7 @@ subroutine test_assumed_shape_to_contiguous(x)
 ! CHECK:    fir.result %[[VAL_4]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:  } else {
 ! CHECK:    %[[VAL_7:.*]] = fir.allocmem !fir.array<?xf32>
-! CHECK:    fir.do_loop {{.*}} {
-              ! ... copy
-! CHECK:    }
+! CHECK:    fir.call @_FortranAAssign
 ! CHECK:    fir.result %[[VAL_7]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:  }
 ! CHECK:  %[[VAL_20:.*]] = arith.constant 0 : index
@@ -42,9 +40,7 @@ subroutine test_assumed_shape_to_contiguous(x)
 ! CHECK:  %[[VAL_25:.*]] = fir.embox %[[VAL_3]](%[[VAL_24]]) : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
 ! CHECK:  fir.call @_QPtakes_contiguous(%[[VAL_25]]) {{.*}}: (!fir.box<!fir.array<?xf32>>) -> ()
 ! CHECK:  fir.if %[[VAL_23]] {
-! CHECK:    fir.do_loop {{.*}} {
-              ! ... copy
-! CHECK:    }
+! CHECK:    fir.call @_FortranAAssign
 ! CHECK:    fir.freemem %[[VAL_3]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:  }
 ! CHECK:  return
@@ -78,9 +74,7 @@ subroutine test_assumed_shape_opt_to_contiguous(x)
 ! CHECK:    fir.result %[[VAL_4]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:  } else {
 ! CHECK:    %[[VAL_7:.*]] = fir.allocmem !fir.array<?xf32>
-! CHECK:    fir.do_loop {{.*}} {
-              ! ... copy
-! CHECK:    }
+! CHECK:    fir.call @_FortranAAssign
 ! CHECK:    fir.result %[[VAL_7]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:  }
 ! CHECK:  %[[VAL_20:.*]] = arith.constant 0 : index
@@ -91,9 +85,7 @@ subroutine test_assumed_shape_opt_to_contiguous(x)
 ! CHECK:  %[[VAL_25:.*]] = fir.embox %[[VAL_3]](%[[VAL_24]]) : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
 ! CHECK:  fir.call @_QPtakes_contiguous(%[[VAL_25]]) {{.*}}: (!fir.box<!fir.array<?xf32>>) -> ()
 ! CHECK:  fir.if %[[VAL_23]] {
-! CHECK:    fir.do_loop {{.*}} {
-              ! ... copy
-! CHECK:    }
+! CHECK:    fir.call @_FortranAAssign
 ! CHECK:    fir.freemem %[[VAL_3]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:  }
 ! CHECK:  return
@@ -128,9 +120,7 @@ subroutine test_assumed_shape_to_contiguous_opt(x)
 ! CHECK:    fir.result %[[VAL_4]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:  } else {
 ! CHECK:    %[[VAL_7:.*]] = fir.allocmem !fir.array<?xf32>
-! CHECK:    fir.do_loop {{.*}} {
-              ! ... copy
-! CHECK:    }
+! CHECK:    fir.call @_FortranAAssign
 ! CHECK:    fir.result %[[VAL_7]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:  }
 ! CHECK:  %[[VAL_20:.*]] = arith.constant 0 : index
@@ -141,9 +131,7 @@ subroutine test_assumed_shape_to_contiguous_opt(x)
 ! CHECK:  %[[VAL_25:.*]] = fir.embox %[[VAL_3]](%[[VAL_24]]) : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
 ! CHECK:  fir.call @_QPtakes_contiguous_optional(%[[VAL_25]]) {{.*}}: (!fir.box<!fir.array<?xf32>>) -> ()
 ! CHECK:  fir.if %[[VAL_23]] {
-! CHECK:    fir.do_loop {{.*}} {
-              ! ... copy
-! CHECK:    }
+! CHECK:    fir.call @_FortranAAssign
 ! CHECK:    fir.freemem %[[VAL_3]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:  }
 ! CHECK:  return
@@ -184,9 +172,7 @@ subroutine test_assumed_shape_opt_to_contiguous_opt(x)
 ! CHECK:      fir.result %[[VAL_11]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:    } else {
 ! CHECK:      %[[VAL_14:.*]] = fir.allocmem !fir.array<?xf32>
-! CHECK:      fir.do_loop {{.*}} {
-                ! copy ...
-! CHECK:      }
+! CHECK:      fir.call @_FortranAAssign
 ! CHECK:      fir.result %[[VAL_14]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:    }
 ! CHECK:    fir.result %[[VAL_10]] : !fir.heap<!fir.array<?xf32>>
@@ -205,9 +191,7 @@ subroutine test_assumed_shape_opt_to_contiguous_opt(x)
 ! CHECK:  %[[VAL_38:.*]] = arith.select %[[VAL_1]], %[[VAL_35]], %[[VAL_37]] : !fir.box<!fir.array<?xf32>>
 ! CHECK:  fir.call @_QPtakes_contiguous_optional(%[[VAL_38]]) {{.*}}: (!fir.box<!fir.array<?xf32>>) -> ()
 ! CHECK:  fir.if %[[VAL_33]] {
-! CHECK:    %[[VAL_47:.*]] = fir.do_loop {{.*}} {
-              ! copy ...
-! CHECK:    }
+! CHECK:    fir.call @_FortranAAssign
 ! CHECK:    fir.freemem %[[VAL_9]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:  }
 ! CHECK:  return
@@ -251,9 +235,7 @@ subroutine test_pointer_to_contiguous_opt(x)
 ! CHECK:      fir.result %[[VAL_13]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:    } else {
 ! CHECK:      %[[VAL_16:.*]] = fir.allocmem !fir.array<?xf32>
-! CHECK:      fir.do_loop {{.*}} {
-                ! copy
-! CHECK:      }
+! CHECK:      fir.call @_FortranAAssign
 ! CHECK:      fir.result %[[VAL_16]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:    }
 ! CHECK:    fir.result %[[VAL_12]] : !fir.heap<!fir.array<?xf32>>
@@ -272,9 +254,7 @@ subroutine test_pointer_to_contiguous_opt(x)
 ! CHECK:  %[[VAL_41:.*]] = arith.select %[[VAL_5]], %[[VAL_38]], %[[VAL_40]] : !fir.box<!fir.array<?xf32>>
 ! CHECK:  fir.call @_QPtakes_contiguous_optional(%[[VAL_41]]) {{.*}}: (!fir.box<!fir.array<?xf32>>) -> ()
 ! CHECK:  fir.if %[[VAL_36]] {
-! CHECK:    fir.do_loop {{.*}} {
-              ! copy
-! CHECK:    }
+! CHECK:    fir.call @_FortranAAssign
 ! CHECK:    fir.freemem %[[VAL_11]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:  }
 ! CHECK:  return
@@ -324,9 +304,7 @@ subroutine test_pointer_opt_to_contiguous_opt(x)
 ! CHECK:      fir.result %[[VAL_13]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:    } else {
 ! CHECK:      %[[VAL_16:.*]] = fir.allocmem !fir.array<?xf32>
-! CHECK:      fir.do_loop {{.*}} {
-                ! copy
-! CHECK:      }
+! CHECK:      fir.call @_FortranAAssign
 ! CHECK:      fir.result %[[VAL_16]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:    }
 ! CHECK:    fir.result %[[VAL_12]] : !fir.heap<!fir.array<?xf32>>
@@ -345,9 +323,7 @@ subroutine test_pointer_opt_to_contiguous_opt(x)
 ! CHECK:  %[[VAL_41:.*]] = arith.select %[[VAL_5]], %[[VAL_38]], %[[VAL_40]] : !fir.box<!fir.array<?xf32>>
 ! CHECK:  fir.call @_QPtakes_contiguous_optional(%[[VAL_41]]) {{.*}}: (!fir.box<!fir.array<?xf32>>) -> ()
 ! CHECK:  fir.if %[[VAL_36]] {
-! CHECK:    fir.do_loop {{.*}} {
-              ! copy
-! CHECK:    }
+! CHECK:    fir.call @_FortranAAssign
 ! CHECK:    fir.freemem %[[VAL_11]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:  }
 ! CHECK:  return

diff  --git a/flang/test/Lower/dummy-argument-optional-2.f90 b/flang/test/Lower/dummy-argument-optional-2.f90
index 9c0e9e3a10eed..ab43fd9167797 100644
--- a/flang/test/Lower/dummy-argument-optional-2.f90
+++ b/flang/test/Lower/dummy-argument-optional-2.f90
@@ -111,9 +111,7 @@ subroutine pass_pointer_array(i)
 ! CHECK:           %[[VAL_10:.*]] = arith.constant 0 : index
 ! CHECK:           %[[VAL_11:.*]]:3 = fir.box_dims %[[box]], %[[VAL_10]] : (!fir.box<!fir.ptr<!fir.array<?xf32>>>, index) -> (index, index, index)
 ! CHECK:           %[[VAL_12:.*]] = fir.allocmem !fir.array<?xf32>, %[[VAL_11]]#1 {uniq_name = ".copyinout"}
-! CHECK:           %[[VAL_20:.*]] = fir.do_loop {{.*}} {
-! CHECK:           }
-! CHECK:           fir.array_merge_store %{{.*}}, %[[VAL_20]] to %[[VAL_12]] : !fir.array<?xf32>, !fir.array<?xf32>, !fir.heap<!fir.array<?xf32>>
+! CHECK:           fir.call @_FortranAAssign
 ! CHECK:           fir.result %[[VAL_12]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:         } else {
 ! CHECK:           %[[VAL_26:.*]] = fir.zero_bits !fir.heap<!fir.array<?xf32>>
@@ -124,9 +122,7 @@ subroutine pass_pointer_array(i)
 ! CHECK:         %[[VAL_29:.*]] = fir.convert %[[VAL_9]] : (!fir.heap<!fir.array<?xf32>>) -> !fir.ref<!fir.array<100xf32>>
 ! CHECK:         fir.call @_QPtakes_opt_explicit_shape(%[[VAL_29]]) {{.*}}: (!fir.ref<!fir.array<100xf32>>) -> ()
 ! CHECK:         fir.if %[[and]] {
-! CHECK:           %[[VAL_40:.*]] = fir.do_loop {{.*}} {
-! CHECK:           }
-! CHECK:           fir.array_merge_store %{{.*}}, %[[VAL_40]] to %[[box]] : !fir.array<?xf32>, !fir.array<?xf32>, !fir.box<!fir.ptr<!fir.array<?xf32>>>
+! CHECK:           fir.call @_FortranAAssign
 ! CHECK:           fir.freemem %[[VAL_9]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:         }
 end subroutine
@@ -149,9 +145,7 @@ subroutine pass_pointer_array_char(c)
 ! CHECK:           %[[VAL_11:.*]]:3 = fir.box_dims %[[VAL_6]], %[[VAL_10]] : (!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>>, index) -> (index, index, index)
 ! CHECK:           %[[VAL_12:.*]] = fir.box_elesize %[[VAL_6]] : (!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>>) -> index
 ! CHECK:           %[[VAL_13:.*]] = fir.allocmem !fir.array<?x!fir.char<1,?>>(%[[VAL_12]] : index), %[[VAL_11]]#1 {uniq_name = ".copyinout"}
-! CHECK:           %[[VAL_21:.*]] = fir.do_loop {{.*}} {
-! CHECK:           }
-! CHECK:           fir.array_merge_store %{{.*}}, %[[VAL_21]] to %[[VAL_13]] typeparams %[[VAL_12]] : !fir.array<?x!fir.char<1,?>>, !fir.array<?x!fir.char<1,?>>, !fir.heap<!fir.array<?x!fir.char<1,?>>>, index
+! CHECK:           fir.call @_FortranAAssign
 ! CHECK:           fir.result %[[VAL_13]] : !fir.heap<!fir.array<?x!fir.char<1,?>>>
 ! CHECK:         } else {
 ! CHECK:           %[[VAL_46:.*]] = fir.zero_bits !fir.heap<!fir.array<?x!fir.char<1,?>>>
@@ -164,9 +158,7 @@ subroutine pass_pointer_array_char(c)
 ! CHECK:         %[[VAL_52:.*]] = fir.emboxchar %[[VAL_50]], %[[VAL_47]] : (!fir.ref<!fir.char<1,?>>, index) -> !fir.boxchar<1>
 ! CHECK:         fir.call @_QPtakes_opt_explicit_shape_char(%[[VAL_52]]) {{.*}}: (!fir.boxchar<1>) -> ()
 ! CHECK:         fir.if %[[and]] {
-! CHECK:           %[[VAL_62:.*]] = fir.do_loop {{.*}} {
-! CHECK:           }
-! CHECK:           fir.array_merge_store %{{.*}}, %[[VAL_62]] to %[[VAL_6]] : !fir.array<?x!fir.char<1,?>>, !fir.array<?x!fir.char<1,?>>, !fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>>
+! CHECK:           fir.call @_FortranAAssign
 ! CHECK:           fir.freemem %[[VAL_9]] : !fir.heap<!fir.array<?x!fir.char<1,?>>>
 ! CHECK:         }
 ! CHECK:         return
@@ -190,8 +182,7 @@ subroutine forward_pointer_array()
 ! CHECK:         %[[is_contiguous:.*]] = fir.call @_FortranAIsContiguous(%{{.*}}) {{.*}}: (!fir.box<none>) -> i1
 ! CHECK:         %[[VAL_7:.*]] = fir.if %[[VAL_6]] -> (!fir.heap<!fir.array<?xf32>>) {
 ! CHECK:           %[[VAL_10:.*]] = fir.allocmem !fir.array<?xf32>
-! CHECK:           fir.do_loop {{.*}} {
-! CHECK:           }
+! CHECK:           fir.call @_FortranAAssign
 ! CHECK:           fir.result %[[VAL_10]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:         } else {
 ! CHECK:           %[[VAL_11:.*]] = fir.zero_bits !fir.heap<!fir.array<?xf32>>
@@ -202,8 +193,7 @@ subroutine forward_pointer_array()
 ! CHECK:         %[[VAL_14:.*]] = fir.convert %[[VAL_7]] : (!fir.heap<!fir.array<?xf32>>) -> !fir.ref<!fir.array<100xf32>>
 ! CHECK:         fir.call @_QPtakes_opt_explicit_shape(%[[VAL_14]]) {{.*}}: (!fir.ref<!fir.array<100xf32>>) -> ()
 ! CHECK:         fir.if %[[and]] {
-! CHECK:           fir.do_loop {{.*}} {
-! CHECK:           }
+! CHECK:           fir.call @_FortranAAssign
 ! CHECK:           fir.freemem %[[VAL_7]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:         }
 end subroutine
@@ -231,9 +221,7 @@ subroutine pass_opt_assumed_shape(x)
 ! CHECK:           %[[VAL_8:.*]] = arith.constant 0 : index
 ! CHECK:           %[[VAL_9:.*]]:3 = fir.box_dims %[[VAL_6]], %[[VAL_8]] : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
 ! CHECK:           %[[VAL_10:.*]] = fir.allocmem !fir.array<?xf32>, %[[VAL_9]]#1 {uniq_name = ".copyinout"}
-! CHECK:           %[[VAL_17:.*]] = fir.do_loop {{.*}} {
-! CHECK:           }
-! CHECK:           fir.array_merge_store %{{.*}}, %[[VAL_17]] to %[[VAL_10]] : !fir.array<?xf32>, !fir.array<?xf32>, !fir.heap<!fir.array<?xf32>>
+! CHECK:           fir.call @_FortranAAssign
 ! CHECK:           fir.result %[[VAL_10]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:         } else {
 ! CHECK:           %[[VAL_23:.*]] = fir.zero_bits !fir.heap<!fir.array<?xf32>>
@@ -244,9 +232,7 @@ subroutine pass_opt_assumed_shape(x)
 ! CHECK:         %[[VAL_26:.*]] = fir.convert %[[VAL_27:.*]] : (!fir.heap<!fir.array<?xf32>>) -> !fir.ref<!fir.array<100xf32>>
 ! CHECK:         fir.call @_QPtakes_opt_explicit_shape(%[[VAL_26]]) {{.*}}: (!fir.ref<!fir.array<100xf32>>) -> ()
 ! CHECK:         fir.if %[[and]] {
-! CHECK:           %[[VAL_36:.*]] = fir.do_loop {{.*}} { 
-! CHECK:           }
-! CHECK:           fir.array_merge_store %{{.*}}, %[[VAL_36]] to %[[VAL_6]] : !fir.array<?xf32>, !fir.array<?xf32>, !fir.box<!fir.array<?xf32>>
+! CHECK:           fir.call @_FortranAAssign
 ! CHECK:           fir.freemem %[[VAL_27]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:         }
 end subroutine
@@ -272,9 +258,7 @@ subroutine pass_opt_assumed_shape_char(c)
 ! CHECK:         } else {
 ! CHECK:           %[[box_elesize:.*]] = fir.box_elesize %[[VAL_7]] : (!fir.box<!fir.array<?x!fir.char<1,?>>>) -> index
 ! CHECK:           %[[temp:.*]] = fir.allocmem !fir.array<?x!fir.char<1,?>>(%[[box_elesize]] : index), %{{.*}}#1 {uniq_name = ".copyinout"}
-! CHECK:           %[[VAL_19:.*]] = fir.do_loop  {{.*}} {
-! CHECK:           }
-! CHECK:           fir.array_merge_store %{{.*}}, %[[VAL_19]] to %[[temp]] typeparams %[[box_elesize]] : !fir.array<?x!fir.char<1,?>>, !fir.array<?x!fir.char<1,?>>, !fir.heap<!fir.array<?x!fir.char<1,?>>>, index
+! CHECK:           fir.call @_FortranAAssign
 ! CHECK:           fir.result %[[VAL_12]] : !fir.heap<!fir.array<?x!fir.char<1,?>>>
 ! CHECK:         } else {
 ! CHECK:           %[[VAL_44:.*]] = fir.zero_bits !fir.heap<!fir.array<?x!fir.char<1,?>>>
@@ -287,8 +271,7 @@ subroutine pass_opt_assumed_shape_char(c)
 ! CHECK:         %[[VAL_50:.*]] = fir.emboxchar %[[VAL_48]], %[[VAL_45]] : (!fir.ref<!fir.char<1,?>>, index) -> !fir.boxchar<1>
 ! CHECK:         fir.call @_QPtakes_opt_explicit_shape_char(%[[VAL_50]]) {{.*}}: (!fir.boxchar<1>) -> ()
 ! CHECK:         fir.if %[[and]] {
-! CHECK:           %[[VAL_59:.*]] = fir.do_loop {{.*}} {
-! CHECK:           fir.array_merge_store %{{.*}}, %[[VAL_59]] to %[[VAL_7]] : !fir.array<?x!fir.char<1,?>>, !fir.array<?x!fir.char<1,?>>, !fir.box<!fir.array<?x!fir.char<1,?>>>
+! CHECK:           fir.call @_FortranAAssign
 ! CHECK:           fir.freemem %[[VAL_49]] : !fir.heap<!fir.array<?x!fir.char<1,?>>>
 ! CHECK:         }
 end subroutine
@@ -411,8 +394,7 @@ subroutine pass_opt_assumed_shape_to_intentin(x)
 ! CHECK:         %[[is_contiguous:.*]] = fir.call @_FortranAIsContiguous(%[[box_none]]) {{.*}}: (!fir.box<none>) -> i1
 ! CHECK:         %[[VAL_7:.*]] = fir.if %[[VAL_1]] -> (!fir.heap<!fir.array<?xf32>>) {
 ! CHECK:           %[[VAL_10:.*]] = fir.allocmem !fir.array<?xf32>
-! CHECK:           fir.do_loop {{.*}} {
-! CHECK:           }
+! CHECK:           fir.call @_FortranAAssign
 ! CHECK:           fir.result %[[VAL_10]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:         } else {
 ! CHECK:           %[[VAL_23:.*]] = fir.zero_bits !fir.heap<!fir.array<?xf32>>
@@ -423,7 +405,7 @@ subroutine pass_opt_assumed_shape_to_intentin(x)
 ! CHECK:         %[[VAL_24:.*]] = fir.convert %[[VAL_7]] : (!fir.heap<!fir.array<?xf32>>) -> !fir.ref<!fir.array<100xf32>>
 ! CHECK:         fir.call @_QPtakes_opt_explicit_shape_intentin(%[[VAL_24]]) {{.*}}: (!fir.ref<!fir.array<100xf32>>) -> ()
 ! CHECK:         fir.if %[[and]] {
-! CHECK-NOT:       fir.do_loop
+! CHECK-NOT:       fir.call @_FortranAAssign
 ! CHECK:           fir.freemem %[[VAL_7]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:         }
 end subroutine
@@ -443,7 +425,7 @@ subroutine pass_opt_assumed_shape_to_intentout(x)
 ! CHECK:         %[[is_contiguous:.*]] = fir.call @_FortranAIsContiguous(%[[box_none]]) {{.*}}: (!fir.box<none>) -> i1
 ! CHECK:         %[[VAL_7:.*]] = fir.if %[[VAL_1]] -> (!fir.heap<!fir.array<?xf32>>) {
 ! CHECK:           %[[VAL_10:.*]] = fir.allocmem !fir.array<?xf32>
-! CHECK-NOT:       fir.do_loop
+! CHECK-NOT:       fir.call @_FortranAAssign
 ! CHECK:           fir.result %[[VAL_10]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:         } else {
 ! CHECK:           %[[VAL_11:.*]] = fir.zero_bits !fir.heap<!fir.array<?xf32>>
@@ -454,8 +436,7 @@ subroutine pass_opt_assumed_shape_to_intentout(x)
 ! CHECK:         %[[VAL_14:.*]] = fir.convert %[[VAL_7]] : (!fir.heap<!fir.array<?xf32>>) -> !fir.ref<!fir.array<100xf32>>
 ! CHECK:         fir.call @_QPtakes_opt_explicit_shape_intentout(%[[VAL_14]]) {{.*}}: (!fir.ref<!fir.array<100xf32>>) -> ()
 ! CHECK:         fir.if %[[and]] {
-! CHECK:           fir.do_loop {{.*}} {
-! CHECK:           }
+! CHECK:           fir.call @_FortranAAssign
 ! CHECK:           fir.freemem %[[VAL_7]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:         }
 end subroutine

diff  --git a/flang/test/Lower/optional-value-caller.f90 b/flang/test/Lower/optional-value-caller.f90
index 089a85b5892ca..b4df3415b7825 100644
--- a/flang/test/Lower/optional-value-caller.f90
+++ b/flang/test/Lower/optional-value-caller.f90
@@ -287,9 +287,7 @@ subroutine test_dyn_array_from_assumed(i, n)
 ! CHECK:    %[[VAL_9:.*]] = arith.constant 0 : index
 ! CHECK:    %[[VAL_10:.*]]:3 = fir.box_dims %[[VAL_7]], %[[VAL_9]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
 ! CHECK:    %[[VAL_11:.*]] = fir.allocmem !fir.array<?xi32>, %[[VAL_10]]#1 {uniq_name = ".copy"}
-! CHECK:    %[[VAL_18:.*]] = fir.do_loop
-! CHECK:    }
-! CHECK:    fir.array_merge_store %{{.*}}, %[[VAL_18]] to %[[VAL_11]] : !fir.array<?xi32>, !fir.array<?xi32>, !fir.heap<!fir.array<?xi32>>
+! CHECK:    fir.call @_FortranAAssign
 ! CHECK:    fir.result %[[VAL_11]] : !fir.heap<!fir.array<?xi32>>
 ! CHECK:  } else {
 ! CHECK:    %[[VAL_24:.*]] = fir.zero_bits !fir.heap<!fir.array<?xi32>>
@@ -300,6 +298,7 @@ subroutine test_dyn_array_from_assumed(i, n)
 ! CHECK:  %[[VAL_25:.*]] = fir.convert %[[VAL_8]] : (!fir.heap<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
 ! CHECK:  fir.call @_QPdyn_array(%[[VAL_25]], %[[VAL_1]]) {{.*}}: (!fir.ref<!fir.array<?xi32>>, !fir.ref<i64>) -> ()
 ! CHECK:  fir.if %[[and]] {
+! CHECK-NOT: fir.call @_FortranAAssign
 ! CHECK:    fir.freemem %[[VAL_8]] : !fir.heap<!fir.array<?xi32>>
 ! CHECK:  }
 end subroutine
@@ -309,34 +308,49 @@ subroutine test_dyn_array_from_assumed(i, n)
 subroutine test_array_ptr(i)
   integer, pointer :: i(:)
   call array(i)
-! CHECK:  %[[VAL_1:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
-! CHECK:  %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box<!fir.ptr<!fir.array<?xi32>>>) -> !fir.ptr<!fir.array<?xi32>>
-! CHECK:  %[[VAL_3:.*]] = fir.convert %[[VAL_2]] : (!fir.ptr<!fir.array<?xi32>>) -> i64
-! CHECK:  %[[VAL_4:.*]] = arith.constant 0 : i64
-! CHECK:  %[[VAL_5:.*]] = arith.cmpi ne, %[[VAL_3]], %[[VAL_4]] : i64
-! CHECK:  %[[VAL_6:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
-! CHECK:  %[[VAL_7:.*]] = arith.constant 0 : index
-! CHECK:  %[[VAL_8:.*]]:3 = fir.box_dims %[[VAL_6]], %[[VAL_7]] : (!fir.box<!fir.ptr<!fir.array<?xi32>>>, index) -> (index, index, index)
-! CHECK:  %[[box_none:.*]] = fir.convert %[[VAL_6]] : (!fir.box<!fir.ptr<!fir.array<?xi32>>>) -> !fir.box<none>
-! CHECK:  %[[is_contiguous:.*]] = fir.call @_FortranAIsContiguous(%[[box_none]]) {{.*}}: (!fir.box<none>) -> i1
-! CHECK:  %[[VAL_9:.*]] = fir.if %[[VAL_5]] -> (!fir.heap<!fir.array<?xi32>>) {
-! CHECK:    %[[VAL_10:.*]] = arith.constant 0 : index
-! CHECK:    %[[VAL_11:.*]]:3 = fir.box_dims %[[VAL_6]], %[[VAL_10]] : (!fir.box<!fir.ptr<!fir.array<?xi32>>>, index) -> (index, index, index)
-! CHECK:    %[[VAL_12:.*]] = fir.allocmem !fir.array<?xi32>, %[[VAL_11]]#1 {uniq_name = ".copy"}
-! CHECK:    %[[VAL_20:.*]] = fir.do_loop
-! CHECK:    }
-! CHECK:    fir.array_merge_store %{{.*}}, %[[VAL_20]] to %[[VAL_12]] : !fir.array<?xi32>, !fir.array<?xi32>, !fir.heap<!fir.array<?xi32>>
-! CHECK:    fir.result %[[VAL_12]] : !fir.heap<!fir.array<?xi32>>
-! CHECK:  } else {
-! CHECK:    %[[VAL_26:.*]] = fir.zero_bits !fir.heap<!fir.array<?xi32>>
-! CHECK:    fir.result %[[VAL_26]] : !fir.heap<!fir.array<?xi32>>
-! CHECK:  }
-! CHECK:  %[[not_contiguous:.*]] = arith.cmpi eq, %[[is_contiguous]], %false{{.*}} : i1
-! CHECK:  %[[and:.*]] = arith.andi %[[VAL_5]], %[[not_contiguous]] : i1
-! CHECK:  %[[VAL_27:.*]] = fir.convert %[[VAL_9]] : (!fir.heap<!fir.array<?xi32>>) -> !fir.ref<!fir.array<100xi32>>
-! CHECK:  fir.if %[[and]] {
-! CHECK:    fir.freemem %[[VAL_9]] : !fir.heap<!fir.array<?xi32>>
-! CHECK:  }
+! CHECK:         %[[VAL_1:.*]] = fir.alloca !fir.box<!fir.array<?xi32>>
+! CHECK:         %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+! CHECK:         %[[VAL_3:.*]] = fir.box_addr %[[VAL_2]] : (!fir.box<!fir.ptr<!fir.array<?xi32>>>) -> !fir.ptr<!fir.array<?xi32>>
+! CHECK:         %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (!fir.ptr<!fir.array<?xi32>>) -> i64
+! CHECK:         %[[VAL_5:.*]] = arith.constant 0 : i64
+! CHECK:         %[[VAL_6:.*]] = arith.cmpi ne, %[[VAL_4]], %[[VAL_5]] : i64
+! CHECK:         %[[VAL_7:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+! CHECK:         %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (!fir.box<!fir.ptr<!fir.array<?xi32>>>) -> !fir.box<none>
+! CHECK:         %[[VAL_9:.*]] = fir.call @_FortranAIsContiguous(%[[VAL_8]]) fastmath<contract> : (!fir.box<none>) -> i1
+! CHECK:         %[[VAL_10:.*]] = fir.if %[[VAL_6]] -> (!fir.heap<!fir.array<?xi32>>) {
+! CHECK:           %[[VAL_11:.*]] = fir.if %[[VAL_9]] -> (!fir.heap<!fir.array<?xi32>>) {
+! CHECK:             %[[VAL_12:.*]] = fir.box_addr %[[VAL_7]] : (!fir.box<!fir.ptr<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+! CHECK:             fir.result %[[VAL_12]] : !fir.heap<!fir.array<?xi32>>
+! CHECK:           } else {
+! CHECK:             %[[VAL_13:.*]] = arith.constant 0 : index
+! CHECK:             %[[VAL_14:.*]]:3 = fir.box_dims %[[VAL_7]], %[[VAL_13]] : (!fir.box<!fir.ptr<!fir.array<?xi32>>>, index) -> (index, index, index)
+! CHECK:             %[[VAL_15:.*]] = fir.allocmem !fir.array<?xi32>, %[[VAL_14]]#1 {uniq_name = ".copy"}
+! CHECK:             %[[VAL_16:.*]] = fir.shape %[[VAL_14]]#1 : (index) -> !fir.shape<1>
+! CHECK:             %[[VAL_17:.*]] = fir.embox %[[VAL_15]](%[[VAL_16]]) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>>
+! CHECK:             fir.store %[[VAL_17]] to %[[VAL_1]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
+! CHECK:             %[[VAL_18:.*]] = fir.address_of(@_QQcl.{{.*}}) : !fir.ref<!fir.char<1,{{.*}}>>
+! CHECK:             %[[VAL_19:.*]] = arith.constant {{.*}} : i32
+! CHECK:             %[[VAL_20:.*]] = fir.convert %[[VAL_1]] : (!fir.ref<!fir.box<!fir.array<?xi32>>>) -> !fir.ref<!fir.box<none>>
+! CHECK:             %[[VAL_21:.*]] = fir.convert %[[VAL_7]] : (!fir.box<!fir.ptr<!fir.array<?xi32>>>) -> !fir.box<none>
+! CHECK:             %[[VAL_22:.*]] = fir.convert %[[VAL_18]] : (!fir.ref<!fir.char<1,{{.*}}>>) -> !fir.ref<i8>
+! CHECK:             %[[VAL_23:.*]] = fir.call @_FortranAAssign(%[[VAL_20]], %[[VAL_21]], %[[VAL_22]], %[[VAL_19]]) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.ref<i8>, i32) -> none
+! CHECK:             fir.result %[[VAL_15]] : !fir.heap<!fir.array<?xi32>>
+! CHECK:           }
+! CHECK:           fir.result %[[VAL_24:.*]] : !fir.heap<!fir.array<?xi32>>
+! CHECK:         } else {
+! CHECK:           %[[VAL_25:.*]] = fir.zero_bits !fir.heap<!fir.array<?xi32>>
+! CHECK:           fir.result %[[VAL_25]] : !fir.heap<!fir.array<?xi32>>
+! CHECK:         }
+! CHECK:         %[[VAL_26:.*]] = arith.constant false
+! CHECK:         %[[VAL_27:.*]] = arith.cmpi eq, %[[VAL_9]], %[[VAL_26]] : i1
+! CHECK:         %[[VAL_28:.*]] = arith.andi %[[VAL_6]], %[[VAL_27]] : i1
+! CHECK:         %[[VAL_29:.*]] = fir.convert %[[VAL_30:.*]] : (!fir.heap<!fir.array<?xi32>>) -> !fir.ref<!fir.array<100xi32>>
+! CHECK:         fir.call @_QParray(%[[VAL_29]]) fastmath<contract> : (!fir.ref<!fir.array<100xi32>>) -> ()
+! CHECK:         fir.if %[[VAL_28]] {
+! CHECK:           fir.freemem %[[VAL_30]] : !fir.heap<!fir.array<?xi32>>
+! CHECK:         }
+! CHECK:         return
+! CHECK:       }
 end subroutine
 
 ! CHECK-LABEL: func @_QMtestPtest_char(
@@ -397,38 +411,53 @@ subroutine test_char_array(c)
   integer(8) :: n
   character(*), optional :: c(:)
   call dyn_char_array(c, n)
-! CHECK:  %[[VAL_1:.*]] = fir.alloca i64 {bindc_name = "n", uniq_name = "_QMtestFtest_char_arrayEn"}
-! CHECK:  %[[VAL_2:.*]] = fir.is_present %[[VAL_0]] : (!fir.box<!fir.array<?x!fir.char<1,?>>>) -> i1
-! CHECK:  %[[VAL_3:.*]] = fir.zero_bits !fir.ref<!fir.array<?x!fir.char<1,?>>>
-! CHECK:  %[[VAL_4:.*]] = arith.constant 0 : index
-! CHECK:  %[[VAL_5:.*]] = fir.shape %[[VAL_4]] : (index) -> !fir.shape<1>
-! CHECK:  %[[VAL_6:.*]] = arith.constant 0 : index
-! CHECK:  %[[VAL_7:.*]] = fir.embox %[[VAL_3]](%[[VAL_5]]) typeparams %[[VAL_6]] : (!fir.ref<!fir.array<?x!fir.char<1,?>>>, !fir.shape<1>, index) -> !fir.box<!fir.array<?x!fir.char<1,?>>>
-! CHECK:  %[[VAL_8:.*]] = arith.select %[[VAL_2]], %[[VAL_0]], %[[VAL_7]] : !fir.box<!fir.array<?x!fir.char<1,?>>>
-! CHECK:  %[[box_none:.*]] = fir.convert %5 : (!fir.box<!fir.array<?x!fir.char<1,?>>>) -> !fir.box<none>
-! CHECK:  %[[is_contiguous:.*]] = fir.call @_FortranAIsContiguous(%[[box_none]]) {{.*}}: (!fir.box<none>) -> i1
-! CHECK:  %[[VAL_9:.*]] = fir.if %[[VAL_2]] -> (!fir.heap<!fir.array<?x!fir.char<1,?>>>) {
-! CHECK:    %[[VAL_10:.*]] = arith.constant 0 : index
-! CHECK:    %[[VAL_11:.*]]:3 = fir.box_dims %[[VAL_8]], %[[VAL_10]] : (!fir.box<!fir.array<?x!fir.char<1,?>>>, index) -> (index, index, index)
-! CHECK:    %[[VAL_12:.*]] = fir.box_elesize %[[VAL_8]] : (!fir.box<!fir.array<?x!fir.char<1,?>>>) -> index
-! CHECK:    %[[VAL_13:.*]] = fir.allocmem !fir.array<?x!fir.char<1,?>>(%[[VAL_12]] : index), %[[VAL_11]]#1 {uniq_name = ".copy"}
-! CHECK:    %[[VAL_20:.*]] = fir.do_loop {{.*}}
-! CHECK:      fir.call @llvm.memmove.p0.p0.i64
-! CHECK:    }
-! CHECK:    fir.array_merge_store %{{.*}}, %[[VAL_20]] to %[[VAL_13]] typeparams %[[VAL_12]] : !fir.array<?x!fir.char<1,?>>, !fir.array<?x!fir.char<1,?>>, !fir.heap<!fir.array<?x!fir.char<1,?>>>, index
-! CHECK:    fir.result %[[VAL_13]] : !fir.heap<!fir.array<?x!fir.char<1,?>>>
-! CHECK:  } else {
-! CHECK:    %[[VAL_45:.*]] = fir.zero_bits !fir.heap<!fir.array<?x!fir.char<1,?>>>
-! CHECK:    fir.result %[[VAL_45]] : !fir.heap<!fir.array<?x!fir.char<1,?>>>
-! CHECK:  }
-! CHECK:  %[[VAL_46:.*]] = fir.box_elesize %[[VAL_8]] : (!fir.box<!fir.array<?x!fir.char<1,?>>>) -> index
-! CHECK:  %[[not_contiguous:.*]] = arith.cmpi eq, %[[is_contiguous]], %false{{.*}} : i1
-! CHECK:  %[[and:.*]] = arith.andi %[[VAL_2]], %[[not_contiguous]] : i1
-! CHECK:  %[[VAL_47:.*]] = fir.convert %[[VAL_9]] : (!fir.heap<!fir.array<?x!fir.char<1,?>>>) -> !fir.ref<!fir.char<1,?>>
-! CHECK:  %[[VAL_49:.*]] = fir.emboxchar %[[VAL_47]], %[[VAL_46]] : (!fir.ref<!fir.char<1,?>>, index) -> !fir.boxchar<1>
-! CHECK:  fir.call @_QPdyn_char_array(%[[VAL_49]], %[[VAL_1]]) {{.*}}: (!fir.boxchar<1>, !fir.ref<i64>) -> ()
-! CHECK:  fir.if %[[and]] {
-! CHECK:    fir.freemem %[[VAL_9]] : !fir.heap<!fir.array<?x!fir.char<1,?>>>
-! CHECK:  }
+! CHECK:         %[[VAL_1:.*]] = fir.alloca !fir.box<!fir.array<?x!fir.char<1,?>>>
+! CHECK:         %[[VAL_2:.*]] = fir.alloca i64 {bindc_name = "n", uniq_name = "_QMtestFtest_char_arrayEn"}
+! CHECK:         %[[VAL_3:.*]] = fir.is_present %[[VAL_0]] : (!fir.box<!fir.array<?x!fir.char<1,?>>>) -> i1
+! CHECK:         %[[VAL_4:.*]] = fir.zero_bits !fir.ref<!fir.array<?x!fir.char<1,?>>>
+! CHECK:         %[[VAL_5:.*]] = arith.constant 0 : index
+! CHECK:         %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1>
+! CHECK:         %[[VAL_7:.*]] = arith.constant 0 : index
+! CHECK:         %[[VAL_8:.*]] = fir.embox %[[VAL_4]](%[[VAL_6]]) typeparams %[[VAL_7]] : (!fir.ref<!fir.array<?x!fir.char<1,?>>>, !fir.shape<1>, index) -> !fir.box<!fir.array<?x!fir.char<1,?>>>
+! CHECK:         %[[VAL_9:.*]] = arith.select %[[VAL_3]], %[[VAL_0]], %[[VAL_8]] : !fir.box<!fir.array<?x!fir.char<1,?>>>
+! CHECK:         %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (!fir.box<!fir.array<?x!fir.char<1,?>>>) -> !fir.box<none>
+! CHECK:         %[[VAL_11:.*]] = fir.call @_FortranAIsContiguous(%[[VAL_10]]) fastmath<contract> : (!fir.box<none>) -> i1
+! CHECK:         %[[VAL_12:.*]] = fir.if %[[VAL_3]] -> (!fir.heap<!fir.array<?x!fir.char<1,?>>>) {
+! CHECK:           %[[VAL_13:.*]] = fir.if %[[VAL_11]] -> (!fir.heap<!fir.array<?x!fir.char<1,?>>>) {
+! CHECK:             %[[VAL_14:.*]] = fir.box_addr %[[VAL_9]] : (!fir.box<!fir.array<?x!fir.char<1,?>>>) -> !fir.heap<!fir.array<?x!fir.char<1,?>>>
+! CHECK:             fir.result %[[VAL_14]] : !fir.heap<!fir.array<?x!fir.char<1,?>>>
+! CHECK:           } else {
+! CHECK:             %[[VAL_15:.*]] = arith.constant 0 : index
+! CHECK:             %[[VAL_16:.*]]:3 = fir.box_dims %[[VAL_9]], %[[VAL_15]] : (!fir.box<!fir.array<?x!fir.char<1,?>>>, index) -> (index, index, index)
+! CHECK:             %[[VAL_17:.*]] = fir.box_elesize %[[VAL_9]] : (!fir.box<!fir.array<?x!fir.char<1,?>>>) -> index
+! CHECK:             %[[VAL_18:.*]] = fir.allocmem !fir.array<?x!fir.char<1,?>>(%[[VAL_17]] : index), %[[VAL_16]]#1 {uniq_name = ".copy"}
+! CHECK:             %[[VAL_19:.*]] = fir.shape %[[VAL_16]]#1 : (index) -> !fir.shape<1>
+! CHECK:             %[[VAL_20:.*]] = fir.embox %[[VAL_18]](%[[VAL_19]]) typeparams %[[VAL_17]] : (!fir.heap<!fir.array<?x!fir.char<1,?>>>, !fir.shape<1>, index) -> !fir.box<!fir.array<?x!fir.char<1,?>>>
+! CHECK:             fir.store %[[VAL_20]] to %[[VAL_1]] : !fir.ref<!fir.box<!fir.array<?x!fir.char<1,?>>>>
+! CHECK:             %[[VAL_21:.*]] = fir.address_of(@_QQcl.{{.*}}) : !fir.ref<!fir.char<1,{{.*}}>>
+! CHECK:             %[[VAL_22:.*]] = arith.constant {{.*}} : i32
+! CHECK:             %[[VAL_23:.*]] = fir.convert %[[VAL_1]] : (!fir.ref<!fir.box<!fir.array<?x!fir.char<1,?>>>>) -> !fir.ref<!fir.box<none>>
+! CHECK:             %[[VAL_24:.*]] = fir.convert %[[VAL_9]] : (!fir.box<!fir.array<?x!fir.char<1,?>>>) -> !fir.box<none>
+! CHECK:             %[[VAL_25:.*]] = fir.convert %[[VAL_21]] : (!fir.ref<!fir.char<1,{{.*}}>>) -> !fir.ref<i8>
+! CHECK:             %[[VAL_26:.*]] = fir.call @_FortranAAssign(%[[VAL_23]], %[[VAL_24]], %[[VAL_25]], %[[VAL_22]]) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.ref<i8>, i32) -> none
+! CHECK:             fir.result %[[VAL_18]] : !fir.heap<!fir.array<?x!fir.char<1,?>>>
+! CHECK:           }
+! CHECK:           fir.result %[[VAL_27:.*]] : !fir.heap<!fir.array<?x!fir.char<1,?>>>
+! CHECK:         } else {
+! CHECK:           %[[VAL_28:.*]] = fir.zero_bits !fir.heap<!fir.array<?x!fir.char<1,?>>>
+! CHECK:           fir.result %[[VAL_28]] : !fir.heap<!fir.array<?x!fir.char<1,?>>>
+! CHECK:         }
+! CHECK:         %[[VAL_29:.*]] = fir.box_elesize %[[VAL_9]] : (!fir.box<!fir.array<?x!fir.char<1,?>>>) -> index
+! CHECK:         %[[VAL_30:.*]] = arith.constant false
+! CHECK:         %[[VAL_31:.*]] = arith.cmpi eq, %[[VAL_11]], %[[VAL_30]] : i1
+! CHECK:         %[[VAL_32:.*]] = arith.andi %[[VAL_3]], %[[VAL_31]] : i1
+! CHECK:         %[[VAL_33:.*]] = fir.convert %[[VAL_34:.*]] : (!fir.heap<!fir.array<?x!fir.char<1,?>>>) -> !fir.ref<!fir.char<1,?>>
+! CHECK:         %[[VAL_35:.*]] = fir.emboxchar %[[VAL_33]], %[[VAL_29]] : (!fir.ref<!fir.char<1,?>>, index) -> !fir.boxchar<1>
+! CHECK:         fir.call @_QPdyn_char_array(%[[VAL_35]], %[[VAL_2]]) fastmath<contract> : (!fir.boxchar<1>, !fir.ref<i64>) -> ()
+! CHECK:         fir.if %[[VAL_32]] {
+! CHECK:           fir.freemem %[[VAL_34]] : !fir.heap<!fir.array<?x!fir.char<1,?>>>
+! CHECK:         }
+! CHECK:         return
+! CHECK:       }
 end subroutine
 end

diff  --git a/flang/test/Lower/parent-component.f90 b/flang/test/Lower/parent-component.f90
index ea7e7744000e8..88c7df000050d 100644
--- a/flang/test/Lower/parent-component.f90
+++ b/flang/test/Lower/parent-component.f90
@@ -58,9 +58,7 @@ subroutine init_with_slice()
   ! CHECK: %[[IS_CONTIGOUS:.*]] = fir.call @_FortranAIsContiguous(%[[BOX_NONE]]) {{.*}}: (!fir.box<none>) -> i1
   ! CHECK: %[[TEMP:.*]] = fir.if %[[IS_CONTIGOUS]] -> (!fir.heap<!fir.array<2x!fir.type<_QFTp{a:i32}>>>) {
   ! CHECK: } else {
-  ! CHECK: %{{.*}} = fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered iter_args(%{{.*}} = %{{.*}}) -> (!fir.array<2x!fir.type<_QFTp{a:i32}>>)
-  ! CHECK: %{{.*}} = fir.field_index a, !fir.type<_QFTp{a:i32}>
-  ! CHECK-NOT: %{{.*}} = fir.field_index b, !fir.type<_QFTp{a:i32}>
+  ! CHECK: fir.call @_FortranAAssign
   ! CHECK: %[[TEMP_CAST:.*]] = fir.convert %[[TEMP]] : (!fir.heap<!fir.array<2x!fir.type<_QFTp{a:i32}>>>) -> !fir.ref<!fir.array<2x!fir.type<_QFTp{a:i32}>>>
   ! CHECK: fir.call @_QFPprint_p(%[[TEMP_CAST]]) {{.*}}: (!fir.ref<!fir.array<2x!fir.type<_QFTp{a:i32}>>>) -> ()
 
@@ -96,9 +94,7 @@ subroutine init_no_slice()
   ! CHECK: %[[IS_CONTIGOUS:.*]] = fir.call @_FortranAIsContiguous(%[[BOX_NONE]]) {{.*}}: (!fir.box<none>) -> i1
   ! CHECK: %[[TEMP:.*]] = fir.if %[[IS_CONTIGOUS]] -> (!fir.heap<!fir.array<2x!fir.type<_QFTp{a:i32}>>>) {
   ! CHECK: } else {
-  ! CHECK: %{{.*}} = fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered iter_args(%{{.*}} = %{{.*}}) -> (!fir.array<2x!fir.type<_QFTp{a:i32}>>)
-  ! CHECK: %{{.*}} = fir.field_index a, !fir.type<_QFTp{a:i32}>
-  ! CHECK-NOT: %{{.*}} = fir.field_index b, !fir.type<_QFTp{a:i32}>
+  ! CHECK: fir.call @_FortranAAssign
   ! CHECK: %[[TEMP_CAST:.*]] = fir.convert %[[TEMP]] : (!fir.heap<!fir.array<2x!fir.type<_QFTp{a:i32}>>>) -> !fir.ref<!fir.array<2x!fir.type<_QFTp{a:i32}>>>
   ! CHECK: fir.call @_QFPprint_p(%[[TEMP_CAST]]) {{.*}}: (!fir.ref<!fir.array<2x!fir.type<_QFTp{a:i32}>>>) -> ()
 
@@ -143,9 +139,7 @@ subroutine init_allocatable()
   ! CHECK: %[[IS_CONTIGOUS:.*]] = fir.call @_FortranAIsContiguous(%[[BOX_NONE]]) {{.*}}: (!fir.box<none>) -> i1
   ! CHECK: %[[TEMP:.*]] = fir.if %[[IS_CONTIGOUS]] -> (!fir.heap<!fir.array<?x!fir.type<_QFTp{a:i32}>>>) {
   ! CHECK: } else {
-  ! CHECK: %{{.*}} = fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered iter_args(%{{.*}} = %{{.*}}) -> (!fir.array<?x!fir.type<_QFTp{a:i32}>>)
-  ! CHECK: %{{.*}} = fir.field_index a, !fir.type<_QFTp{a:i32}>
-  ! CHECK-NOT: %{{.*}} = fir.field_index b, !fir.type<_QFTp{a:i32}>
+  ! CHECK: fir.call @_FortranAAssign
   ! CHECK: %[[TEMP_CAST:.*]] = fir.convert %[[TEMP]] : (!fir.heap<!fir.array<?x!fir.type<_QFTp{a:i32}>>>) -> !fir.ref<!fir.array<2x!fir.type<_QFTp{a:i32}>>>
   ! CHECK: fir.call @_QFPprint_p(%[[TEMP_CAST]]) {{.*}}: (!fir.ref<!fir.array<2x!fir.type<_QFTp{a:i32}>>>) -> ()
 
@@ -199,7 +193,7 @@ subroutine init_assumed(y)
   ! CHECK: %[[BOX:.*]] = fir.rebox %[[ARG0]] : (!fir.box<!fir.array<?x!fir.type<_QFTc{a:i32,b:i32}>>>) -> !fir.box<!fir.array<?x!fir.type<_QFTc{a:i32,b:i32}>>>
   ! CHECK: %[[FIELD:.*]] = fir.field_index a, !fir.type<_QFTc{a:i32,b:i32}>
   ! CHECK: %[[C0:.*]] = arith.constant 0 : index
-  ! CHECK: %[[BOX_DIMS:.*]]:3 = fir.box_dims %13, %[[C0]] : (!fir.box<!fir.array<?x!fir.type<_QFTc{a:i32,b:i32}>>>, index) -> (index, index, index)
+  ! CHECK: %[[BOX_DIMS:.*]]:3 = fir.box_dims %[[BOX]], %[[C0]] : (!fir.box<!fir.array<?x!fir.type<_QFTc{a:i32,b:i32}>>>, index) -> (index, index, index)
   ! CHECK: %[[C1:.*]] = arith.constant 1 : index
   ! CHECK: %[[SLICE:.*]] = fir.slice %[[C1]], %[[BOX_DIMS]]#1, %[[C1]] path %[[FIELD]] : (index, index, index, !fir.field) -> !fir.slice<1>
   ! CHECK: %[[REBOX:.*]] = fir.rebox %arg0 [%[[SLICE]]] : (!fir.box<!fir.array<?x!fir.type<_QFTc{a:i32,b:i32}>>>, !fir.slice<1>) -> !fir.box<!fir.array<?x!fir.type<_QFTp{a:i32}>>>


        


More information about the flang-commits mailing list