[flang-commits] [flang] 9f44d5d - [flang] Simplify copy-in copy-out runtime API (#95822)

via flang-commits flang-commits at lists.llvm.org
Tue Jun 18 03:04:08 PDT 2024


Author: jeanPerier
Date: 2024-06-18T12:04:04+02:00
New Revision: 9f44d5d9d0903adaa9deb35d33056202e5030cb3

URL: https://github.com/llvm/llvm-project/commit/9f44d5d9d0903adaa9deb35d33056202e5030cb3
DIFF: https://github.com/llvm/llvm-project/commit/9f44d5d9d0903adaa9deb35d33056202e5030cb3.diff

LOG: [flang] Simplify copy-in copy-out runtime API (#95822)

The runtime API for copy-in copy-out currently only has an entry only
for the copy-out. This entry has a "skipInit" boolean that is never set
to false by lowering and it does not deal with the deallocation of the
temporary.

The generated code was a mix of inline code and runtime calls This is not a big deal,
but this is unneeded compiler and generated code complexity.
With assumed-rank, it is also more cumbersome to establish a
temporary descriptor.

Instead, this patch:
- Adds a CopyInAssignment API that deals with establishing the temporary
descriptor and does the copy.
- Removes unused arg to CopyOutAssign, and pushes
destruction/deallocation responsibility inside it.

Note that this runtime API are still not responsible for deciding the
need of copying-in and out. This is kept as a separate runtime call to
IsContiguous, which is easier to inline/replace by inline code with the
hope of removing the copy-in/out calls after user function inlining.
@vzakhari has already shown that always inlining all the copy part
increase Fortran compilation time due to loop optimization attempts for
loops that are known to have little optimization profitability (the
variable being copied from and to is not contiguous).

Added: 
    

Modified: 
    flang/include/flang/Optimizer/Builder/Runtime/Assign.h
    flang/include/flang/Optimizer/Dialect/FIRType.h
    flang/include/flang/Optimizer/HLFIR/HLFIROps.td
    flang/include/flang/Runtime/assign.h
    flang/lib/Lower/ConvertCall.cpp
    flang/lib/Lower/ConvertExpr.cpp
    flang/lib/Optimizer/Builder/Runtime/Assign.cpp
    flang/lib/Optimizer/Dialect/FIRType.cpp
    flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp
    flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp
    flang/runtime/assign.cpp
    flang/test/HLFIR/assumed-type-actual-args.f90
    flang/test/HLFIR/assumed_shape_with_value_keyword.f90
    flang/test/HLFIR/copy-in-out-codegen.fir
    flang/test/HLFIR/copy-in-out.fir
    flang/test/HLFIR/memory-effects.fir
    flang/test/Lower/HLFIR/call-sequence-associated-descriptors.f90
    flang/test/Lower/HLFIR/calls-assumed-shape.f90
    flang/test/Lower/HLFIR/calls-constant-expr-arg.f90
    flang/test/Lower/HLFIR/calls-optional.f90
    flang/test/Lower/HLFIR/calls-poly-to-assumed-type.f90
    flang/test/Lower/HLFIR/poly_expr_for_nonpoly_dummy.f90
    flang/test/Lower/call-copy-in-out.f90
    flang/test/Lower/dummy-argument-assumed-shape-optional.f90
    flang/test/Lower/dummy-argument-optional-2.f90
    flang/test/Lower/optional-value-caller.f90

Removed: 
    


################################################################################
diff  --git a/flang/include/flang/Optimizer/Builder/Runtime/Assign.h b/flang/include/flang/Optimizer/Builder/Runtime/Assign.h
index 14d338b7093e8..52a6a1d8e5a02 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/Assign.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/Assign.h
@@ -56,18 +56,12 @@ void genAssignExplicitLengthCharacter(fir::FirOpBuilder &builder,
 void genAssignTemporary(fir::FirOpBuilder &builder, mlir::Location loc,
                         mlir::Value destBox, mlir::Value sourceBox);
 
-/// Generate runtime call to CopyOutAssign to assign \p sourceBox to
-/// \p destBox. This call implements the copy-out of a temporary
-/// (\p sourceBox) to the actual argument (\p destBox) passed to a procedure,
-/// after the procedure returns to the caller.
-/// If \p skipToInit is false, then \p destBox will be initialized before
-/// the assignment, otherwise, it is assumed to be already initialized.
-/// The runtime makes sure that there is no reallocation of the top-level
-/// entity represented by \p destBox. If reallocation is required
-/// for the components of \p destBox, then it is done without finalization.
+/// Generate runtime call to "CopyInAssign" runtime API.
+void genCopyInAssign(fir::FirOpBuilder &builder, mlir::Location loc,
+                     mlir::Value tempBoxAddr, mlir::Value varBoxAddr);
+/// Generate runtime call to "CopyOutAssign" runtime API.
 void genCopyOutAssign(fir::FirOpBuilder &builder, mlir::Location loc,
-                      mlir::Value destBox, mlir::Value sourceBox,
-                      bool skipToInit);
+                      mlir::Value varBoxAddr, mlir::Value tempBoxAddr);
 
 } // namespace fir::runtime
 #endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_ASSIGN_H

diff  --git a/flang/include/flang/Optimizer/Dialect/FIRType.h b/flang/include/flang/Optimizer/Dialect/FIRType.h
index 0aeb29a93d71e..3498a329ced30 100644
--- a/flang/include/flang/Optimizer/Dialect/FIRType.h
+++ b/flang/include/flang/Optimizer/Dialect/FIRType.h
@@ -41,6 +41,9 @@ class BaseBoxType : public mlir::Type {
 public:
   using mlir::Type::Type;
 
+  /// Box attributes.
+  enum class Attribute { None, Allocatable, Pointer };
+
   /// Returns the element type of this box type.
   mlir::Type getEleTy() const;
 
@@ -55,6 +58,9 @@ class BaseBoxType : public mlir::Type {
   BaseBoxType getBoxTypeWithNewShape(mlir::Type shapeMold) const;
   BaseBoxType getBoxTypeWithNewShape(int rank) const;
 
+  /// Return the same type, except for the attribute (fir.heap/fir.ptr).
+  BaseBoxType getBoxTypeWithNewAttr(Attribute attr) const;
+
   /// Methods for support type inquiry through isa, cast, and dyn_cast.
   static bool classof(mlir::Type type);
 };

diff  --git a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td
index b537d9e11ef80..e9915e899c2c9 100644
--- a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td
+++ b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td
@@ -1091,9 +1091,12 @@ def hlfir_CopyInOp : hlfir_Op<"copy_in", [MemoryEffects<[MemAlloc]>]> {
     potentially absent variable storage. The second result indicates if a copy
     was made.
 
+    A descriptor address must be provided for the temporary. This descriptor will
+    be set if a temporary copy was made.
+
     This operation is meant to be used in combination with the hlfir.copy_out
-    operation that deletes the temporary if it was created and copies the data
-    back if needed.
+    operation that takes the address of the descriptor for the temporary, deletes
+    the temporary if it was created, and copies the data back if needed.
     This operation allows passing non contiguous arrays to contiguous dummy
     arguments, which is possible in Fortran procedure references.
 
@@ -1103,17 +1106,19 @@ def hlfir_CopyInOp : hlfir_Op<"copy_in", [MemoryEffects<[MemAlloc]>]> {
   }];
 
   let arguments = (ins Arg<fir_BaseBoxType, "", [MemRead]>:$var,
-                   Optional<I1>:$var_is_present);
+                    Arg<AnyReferenceLike, "", [MemWrite]>:$tempBox,
+                    Optional<I1>:$var_is_present);
 
   let results = (outs fir_BaseBoxType, I1);
 
   let assemblyFormat = [{
-    $var (`handle_optional` $var_is_present^)?
+    $var `to` $tempBox (`handle_optional` $var_is_present^)?
     attr-dict `:` functional-type(operands, results)
   }];
 
   let builders = [
-    OpBuilder<(ins "mlir::Value":$var, "mlir::Value":$var_is_present)>
+    OpBuilder<(ins "mlir::Value":$var, "mlir::Value":$temp_box,
+          "mlir::Value":$var_is_present)>
   ];
 
   let extraClassDeclaration = [{
@@ -1138,9 +1143,10 @@ def hlfir_CopyOutOp : hlfir_Op<"copy_out", [MemoryEffects<[MemFree]>]> {
     the temporary.
     The copy back is done if $var is provided and $was_copied is true.
     The deallocation of $temp is done if $was_copied is true.
+    $temp must be the descriptor address that was provided to hlfir.copy_in.
   }];
 
-  let arguments = (ins Arg<fir_BaseBoxType, "", [MemRead]>:$temp,
+  let arguments = (ins Arg<AnyReferenceLike, "", [MemRead]>:$temp,
                        I1:$was_copied,
                        Arg<Optional<fir_BaseBoxType>, "", [MemWrite]>:$var);
 

diff  --git a/flang/include/flang/Runtime/assign.h b/flang/include/flang/Runtime/assign.h
index b19c02f44c73b..a1cc9eaf4355f 100644
--- a/flang/include/flang/Runtime/assign.h
+++ b/flang/include/flang/Runtime/assign.h
@@ -36,8 +36,16 @@ void RTDECL(Assign)(Descriptor &to, const Descriptor &from,
 // reallocation.
 void RTDECL(AssignTemporary)(Descriptor &to, const Descriptor &from,
     const char *sourceFile = nullptr, int sourceLine = 0);
-void RTDECL(CopyOutAssign)(Descriptor &to, const Descriptor &from,
-    bool skipToInit, const char *sourceFile = nullptr, int sourceLine = 0);
+
+// Establish "temp" descriptor as an allocatable descriptor with the same type,
+// rank, and length parameters as "var" and copy "var" to it using
+// AssignTemporary.
+void RTDECL(CopyInAssign)(Descriptor &temp, const Descriptor &var,
+    const char *sourceFile = nullptr, int sourceLine = 0);
+// When "var" is provided, copy "temp" to it assuming "var" is already
+// initialized. Destroy and deallocate "temp" in all cases.
+void RTDECL(CopyOutAssign)(Descriptor *var, Descriptor &temp,
+    const char *sourceFile = nullptr, int sourceLine = 0);
 // This variant is for assignments to explicit-length CHARACTER left-hand
 // sides that might need to handle truncation or blank-fill, and
 // must maintain the character length even if an allocatable array

diff  --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index 65a2ffbea5dd1..daa22fee0b409 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -920,9 +920,11 @@ namespace {
 struct CallCleanUp {
   struct CopyIn {
     void genCleanUp(mlir::Location loc, fir::FirOpBuilder &builder) {
-      builder.create<hlfir::CopyOutOp>(loc, copiedIn, wasCopied, copyBackVar);
+      builder.create<hlfir::CopyOutOp>(loc, tempBox, wasCopied, copyBackVar);
     }
-    mlir::Value copiedIn;
+    // address of the descriptor holding the temp if a temp was created.
+    mlir::Value tempBox;
+    // Boolean indicating if a copy was made or not.
     mlir::Value wasCopied;
     // copyBackVar may be null if copy back is not needed.
     mlir::Value copyBackVar;
@@ -945,10 +947,10 @@ struct CallCleanUp {
 /// It holds the value to be passed in the call and any related
 /// clean-ups to be done after the call.
 struct PreparedDummyArgument {
-  void pushCopyInCleanUp(mlir::Value copiedIn, mlir::Value wasCopied,
+  void pushCopyInCleanUp(mlir::Value tempBox, mlir::Value wasCopied,
                          mlir::Value copyBackVar) {
     cleanups.emplace_back(
-        CallCleanUp{CallCleanUp::CopyIn{copiedIn, wasCopied, copyBackVar}});
+        CallCleanUp{CallCleanUp::CopyIn{tempBox, wasCopied, copyBackVar}});
   }
   void pushExprAssociateCleanUp(mlir::Value tempVar, mlir::Value wasCopied) {
     cleanups.emplace_back(
@@ -987,7 +989,6 @@ struct ConditionallyPreparedDummy {
     for (const CallCleanUp &c : preparedDummy.cleanups) {
       if (const auto *copyInCleanUp =
               std::get_if<CallCleanUp::CopyIn>(&c.cleanUp)) {
-        thenResultValues.push_back(copyInCleanUp->copiedIn);
         thenResultValues.push_back(copyInCleanUp->wasCopied);
         if (copyInCleanUp->copyBackVar)
           thenResultValues.push_back(copyInCleanUp->copyBackVar);
@@ -1042,8 +1043,10 @@ struct ConditionallyPreparedDummy {
         mlir::Value copyBackVar;
         if (copyInCleanUp->copyBackVar)
           copyBackVar = ifOp.getResults().back();
-        preparedDummy.pushCopyInCleanUp(ifOp.getResults()[1],
-                                        ifOp.getResults()[2], copyBackVar);
+        // tempBox is an hlfir.copy_in argument created outside of the
+        // fir.if region. It needs not to be threaded as a fir.if result.
+        preparedDummy.pushCopyInCleanUp(copyInCleanUp->tempBox,
+                                        ifOp.getResults()[1], copyBackVar);
       } else {
         preparedDummy.pushExprAssociateCleanUp(ifOp.getResults()[1],
                                                ifOp.getResults()[2]);
@@ -1204,10 +1207,29 @@ static PreparedDummyArgument preparePresentUserCallActualArgument(
         dummyTypeWithActualRank, actual.getFortranElementType(),
         actual.isPolymorphic());
 
+  PreparedDummyArgument preparedDummy;
+
+  // Helpers to generate hlfir.copy_in operation and register the related
+  // hlfir.copy_out creation.
+  auto genCopyIn = [&](hlfir::Entity var, bool doCopyOut) -> hlfir::Entity {
+    auto baseBoxTy = mlir::dyn_cast<fir::BaseBoxType>(var.getType());
+    assert(baseBoxTy && "expect non simply contiguous variables to be boxes");
+    // Create allocatable descriptor for the potential temporary.
+    mlir::Type tempBoxType = baseBoxTy.getBoxTypeWithNewAttr(
+        fir::BaseBoxType::Attribute::Allocatable);
+    mlir::Value tempBox = builder.createTemporary(loc, tempBoxType);
+    auto copyIn = builder.create<hlfir::CopyInOp>(
+        loc, var, tempBox, /*var_is_present=*/mlir::Value{});
+    // Register the copy-out after the call.
+    preparedDummy.pushCopyInCleanUp(copyIn.getTempBox(), copyIn.getWasCopied(),
+                                    doCopyOut ? copyIn.getVar()
+                                              : mlir::Value{});
+    return hlfir::Entity{copyIn.getCopiedIn()};
+  };
+
   // Step 2: prepare the storage for the dummy arguments, ensuring that it
   // matches the dummy requirements (e.g., must be contiguous or must be
   // a temporary).
-  PreparedDummyArgument preparedDummy;
   hlfir::Entity entity =
       hlfir::derefPointersAndAllocatables(loc, builder, actual);
   if (entity.isVariable()) {
@@ -1243,8 +1265,6 @@ static PreparedDummyArgument preparePresentUserCallActualArgument(
       preparedDummy.pushExprAssociateCleanUp(associate);
     } else if (mustDoCopyInOut) {
       // Copy-in non contiguous variables.
-      assert(mlir::isa<fir::BaseBoxType>(entity.getType()) &&
-             "expect non simply contiguous variables to be boxes");
       if (actualIsAssumedRank)
         TODO(loc, "copy-in and copy-out of assumed-rank arguments");
       // TODO: for non-finalizable monomorphic derived type actual
@@ -1254,13 +1274,7 @@ static PreparedDummyArgument preparePresentUserCallActualArgument(
       // allocation for the temp in this case. We can communicate
       // this to the codegen via some CopyInOp flag.
       // This is a performance concern.
-      auto copyIn = builder.create<hlfir::CopyInOp>(
-          loc, entity, /*var_is_present=*/mlir::Value{});
-      entity = hlfir::Entity{copyIn.getCopiedIn()};
-      // Register the copy-out after the call.
-      preparedDummy.pushCopyInCleanUp(
-          copyIn.getCopiedIn(), copyIn.getWasCopied(),
-          arg.mayBeModifiedByCall() ? copyIn.getVar() : mlir::Value{});
+      entity = genCopyIn(entity, arg.mayBeModifiedByCall());
     }
   } else {
     const Fortran::lower::SomeExpr *expr = arg.entity->UnwrapExpr();
@@ -1287,14 +1301,7 @@ static PreparedDummyArgument preparePresentUserCallActualArgument(
       entity = hlfir::Entity{builder.create<fir::ReboxOp>(
           loc, boxType, entity, /*shape=*/mlir::Value{},
           /*slice=*/mlir::Value{})};
-      auto copyIn = builder.create<hlfir::CopyInOp>(
-          loc, entity, /*var_is_present=*/mlir::Value{});
-      entity = hlfir::Entity{copyIn.getCopiedIn()};
-      // Note that the copy-out is not required, but the copy-in
-      // temporary must be deallocated if created.
-      preparedDummy.pushCopyInCleanUp(copyIn.getCopiedIn(),
-                                      copyIn.getWasCopied(),
-                                      /*copyBackVar=*/mlir::Value{});
+      entity = genCopyIn(entity, /*doCopyOut=*/false);
     }
   }
 

diff  --git a/flang/lib/Lower/ConvertExpr.cpp b/flang/lib/Lower/ConvertExpr.cpp
index 9937e9d159886..44c3dc88edd32 100644
--- a/flang/lib/Lower/ConvertExpr.cpp
+++ b/flang/lib/Lower/ConvertExpr.cpp
@@ -2290,11 +2290,21 @@ class ScalarExprLowering {
     bool isActualArgBox =
         fir::isa_box_type(fir::getBase(copyOutPair.var).getType());
     auto doCopyOut = [&]() {
-      if (!copyOutPair.argMayBeModifiedByCall) {
-        return;
-      }
       if (!isActualArgBox || inlineCopyInOutForBoxes) {
-        genArrayCopy(copyOutPair.var, copyOutPair.temp);
+        if (copyOutPair.argMayBeModifiedByCall)
+          genArrayCopy(copyOutPair.var, copyOutPair.temp);
+        if (mlir::isa<fir::RecordType>(
+                fir::getElementTypeOf(copyOutPair.temp))) {
+          // Destroy components of the temporary (if any).
+          // If there are no components requiring destruction, then the call
+          // is a no-op.
+          mlir::Value tempBox =
+              fir::getBase(builder.createBox(loc, copyOutPair.temp));
+          fir::runtime::genDerivedTypeDestroyWithoutFinalization(builder, loc,
+                                                                 tempBox);
+        }
+        // Deallocate the top-level entity of the temporary.
+        builder.create<fir::FreeMemOp>(loc, fir::getBase(copyOutPair.temp));
         return;
       }
       // Generate CopyOutAssign() call to copy data from the temporary
@@ -2305,51 +2315,39 @@ class ScalarExprLowering {
       // Moreover, CopyOutAssign() guarantees that there will be no
       // finalization for the LHS even if it is of a derived type
       // with finalization.
+
+      // Create allocatable descriptor for the temp so that the runtime may
+      // deallocate it.
       mlir::Value srcBox =
           fir::getBase(builder.createBox(loc, copyOutPair.temp));
-      mlir::Value destBox =
-          fir::getBase(builder.createBox(loc, copyOutPair.var));
-      mlir::Value destBoxRef = builder.createTemporary(loc, destBox.getType());
-      builder.create<fir::StoreOp>(loc, destBox, destBoxRef);
-      fir::runtime::genCopyOutAssign(builder, loc, destBoxRef, srcBox,
-                                     /*skipToInit=*/true);
-    };
-    if (!copyOutPair.restrictCopyAndFreeAtRuntime) {
-      doCopyOut();
-
-      if (mlir::isa<fir::RecordType>(fir::getElementTypeOf(copyOutPair.temp))) {
-        // Destroy components of the temporary (if any).
-        // If there are no components requiring destruction, then the call
-        // is a no-op.
-        mlir::Value tempBox =
-            fir::getBase(builder.createBox(loc, copyOutPair.temp));
-        fir::runtime::genDerivedTypeDestroyWithoutFinalization(builder, loc,
-                                                               tempBox);
+      mlir::Type allocBoxTy =
+          mlir::cast<fir::BaseBoxType>(srcBox.getType())
+              .getBoxTypeWithNewAttr(fir::BaseBoxType::Attribute::Allocatable);
+      srcBox = builder.create<fir::ReboxOp>(loc, allocBoxTy, srcBox,
+                                            /*shift=*/mlir::Value{},
+                                            /*slice=*/mlir::Value{});
+      mlir::Value srcBoxRef = builder.createTemporary(loc, srcBox.getType());
+      builder.create<fir::StoreOp>(loc, srcBox, srcBoxRef);
+      // Create descriptor pointer to variable descriptor if copy out is needed,
+      // and nullptr otherwise.
+      mlir::Value destBoxRef;
+      if (copyOutPair.argMayBeModifiedByCall) {
+        mlir::Value destBox =
+            fir::getBase(builder.createBox(loc, copyOutPair.var));
+        destBoxRef = builder.createTemporary(loc, destBox.getType());
+        builder.create<fir::StoreOp>(loc, destBox, destBoxRef);
+      } else {
+        destBoxRef = builder.create<fir::ZeroOp>(loc, srcBoxRef.getType());
       }
+      fir::runtime::genCopyOutAssign(builder, loc, destBoxRef, srcBoxRef);
+    };
 
-      // Deallocate the top-level entity of the temporary.
-      builder.create<fir::FreeMemOp>(loc, fir::getBase(copyOutPair.temp));
-      return;
-    }
-
-    builder.genIfThen(loc, *copyOutPair.restrictCopyAndFreeAtRuntime)
-        .genThen([&]() {
-          doCopyOut();
-          if (mlir::isa<fir::RecordType>(
-                  fir::getElementTypeOf(copyOutPair.temp))) {
-            // Destroy components of the temporary (if any).
-            // If there are no components requiring destruction, then the call
-            // is a no-op.
-            mlir::Value tempBox =
-                fir::getBase(builder.createBox(loc, copyOutPair.temp));
-            fir::runtime::genDerivedTypeDestroyWithoutFinalization(builder, loc,
-                                                                   tempBox);
-          }
-
-          // Deallocate the top-level entity of the temporary.
-          builder.create<fir::FreeMemOp>(loc, fir::getBase(copyOutPair.temp));
-        })
-        .end();
+    if (!copyOutPair.restrictCopyAndFreeAtRuntime)
+      doCopyOut();
+    else
+      builder.genIfThen(loc, *copyOutPair.restrictCopyAndFreeAtRuntime)
+          .genThen([&]() { doCopyOut(); })
+          .end();
   }
 
   /// Lower a designator to a variable that may be absent at runtime into an

diff  --git a/flang/lib/Optimizer/Builder/Runtime/Assign.cpp b/flang/lib/Optimizer/Builder/Runtime/Assign.cpp
index ad0c2af85cdf3..62f03f7d48665 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Assign.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Assign.cpp
@@ -69,19 +69,29 @@ void fir::runtime::genAssignTemporary(fir::FirOpBuilder &builder,
   builder.create<fir::CallOp>(loc, func, args);
 }
 
+void fir::runtime::genCopyInAssign(fir::FirOpBuilder &builder,
+                                   mlir::Location loc, mlir::Value destBox,
+                                   mlir::Value sourceBox) {
+  auto func = fir::runtime::getRuntimeFunc<mkRTKey(CopyInAssign)>(loc, builder);
+  auto fTy = func.getFunctionType();
+  auto sourceFile = fir::factory::locationToFilename(builder, loc);
+  auto sourceLine =
+      fir::factory::locationToLineNo(builder, loc, fTy.getInput(3));
+  auto args = fir::runtime::createArguments(builder, loc, fTy, destBox,
+                                            sourceBox, sourceFile, sourceLine);
+  builder.create<fir::CallOp>(loc, func, args);
+}
+
 void fir::runtime::genCopyOutAssign(fir::FirOpBuilder &builder,
                                     mlir::Location loc, mlir::Value destBox,
-                                    mlir::Value sourceBox, bool skipToInit) {
+                                    mlir::Value sourceBox) {
   auto func =
       fir::runtime::getRuntimeFunc<mkRTKey(CopyOutAssign)>(loc, builder);
   auto fTy = func.getFunctionType();
   auto sourceFile = fir::factory::locationToFilename(builder, loc);
   auto sourceLine =
-      fir::factory::locationToLineNo(builder, loc, fTy.getInput(4));
-  auto i1Ty = builder.getIntegerType(1);
-  auto skipToInitVal = builder.createIntegerConstant(loc, i1Ty, skipToInit);
-  auto args =
-      fir::runtime::createArguments(builder, loc, fTy, destBox, sourceBox,
-                                    skipToInitVal, sourceFile, sourceLine);
+      fir::factory::locationToLineNo(builder, loc, fTy.getInput(3));
+  auto args = fir::runtime::createArguments(builder, loc, fTy, destBox,
+                                            sourceBox, sourceFile, sourceLine);
   builder.create<fir::CallOp>(loc, func, args);
 }

diff  --git a/flang/lib/Optimizer/Dialect/FIRType.cpp b/flang/lib/Optimizer/Dialect/FIRType.cpp
index b6adb31213cd1..b3f2ec848a2d0 100644
--- a/flang/lib/Optimizer/Dialect/FIRType.cpp
+++ b/flang/lib/Optimizer/Dialect/FIRType.cpp
@@ -1335,6 +1335,26 @@ fir::BaseBoxType fir::BaseBoxType::getBoxTypeWithNewShape(int rank) const {
   return mlir::cast<fir::BaseBoxType>(changeTypeShape(*this, newShape));
 }
 
+fir::BaseBoxType fir::BaseBoxType::getBoxTypeWithNewAttr(
+    fir::BaseBoxType::Attribute attr) const {
+  mlir::Type baseType = fir::unwrapRefType(getEleTy());
+  switch (attr) {
+  case fir::BaseBoxType::Attribute::None:
+    break;
+  case fir::BaseBoxType::Attribute::Allocatable:
+    baseType = fir::HeapType::get(baseType);
+    break;
+  case fir::BaseBoxType::Attribute::Pointer:
+    baseType = fir::PointerType::get(baseType);
+    break;
+  }
+  return llvm::TypeSwitch<fir::BaseBoxType, fir::BaseBoxType>(*this)
+      .Case<fir::BoxType>(
+          [baseType](auto) { return fir::BoxType::get(baseType); })
+      .Case<fir::ClassType>(
+          [baseType](auto) { return fir::ClassType::get(baseType); });
+}
+
 bool fir::BaseBoxType::isAssumedRank() const {
   if (auto seqTy =
           mlir::dyn_cast<fir::SequenceType>(fir::unwrapRefType(getEleTy())))

diff  --git a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp
index 218b38e9ba79d..bf0acc21d24b1 100644
--- a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp
+++ b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp
@@ -1495,9 +1495,9 @@ mlir::LogicalResult hlfir::DestroyOp::verify() {
 
 void hlfir::CopyInOp::build(mlir::OpBuilder &builder,
                             mlir::OperationState &odsState, mlir::Value var,
-                            mlir::Value var_is_present) {
+                            mlir::Value tempBox, mlir::Value var_is_present) {
   return build(builder, odsState, {var.getType(), builder.getI1Type()}, var,
-               var_is_present);
+               tempBox, var_is_present);
 }
 
 //===----------------------------------------------------------------------===//

diff  --git a/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp
index b48b993ddc5af..74bbab0d72e9f 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp
@@ -31,28 +31,6 @@ namespace hlfir {
 
 using namespace mlir;
 
-static mlir::Value genAllocatableTempFromSourceBox(mlir::Location loc,
-                                                   fir::FirOpBuilder &builder,
-                                                   mlir::Value sourceBox) {
-  assert(mlir::isa<fir::BaseBoxType>(sourceBox.getType()) &&
-         "must be a base box type");
-  // Use the runtime to make a quick and dirty temp with the rhs value.
-  // Overkill for scalar rhs that could be done in much more clever ways.
-  // Note that temp descriptor must have the allocatable flag set so that
-  // the runtime will allocate it with the shape and type parameters of
-  // the RHS.
-  // This has the huge benefit of dealing with all cases, including
-  // polymorphic entities.
-  mlir::Type fromHeapType = fir::HeapType::get(fir::unwrapRefType(
-      mlir::cast<fir::BaseBoxType>(sourceBox.getType()).getEleTy()));
-  mlir::Type fromBoxHeapType = fir::BoxType::get(fromHeapType);
-  mlir::Value fromMutableBox =
-      fir::factory::genNullBoxStorage(builder, loc, fromBoxHeapType);
-  fir::runtime::genAssignTemporary(builder, loc, fromMutableBox, sourceBox);
-  mlir::Value copy = builder.create<fir::LoadOp>(loc, fromMutableBox);
-  return copy;
-}
-
 namespace {
 /// May \p lhs alias with \p rhs?
 /// TODO: implement HLFIR alias analysis.
@@ -211,13 +189,19 @@ class CopyInOpConversion : public mlir::OpRewritePattern<hlfir::CopyInOp> {
               // check (for IsContiguous) the copy loops can hardly provide any
               // value to optimizations, instead, the optimizer just wastes
               // compilation time on these loops.
-              mlir::Value temp =
-                  genAllocatableTempFromSourceBox(loc, builder, inputVariable);
+              mlir::Value temp = copyInOp.getTempBox();
+              fir::runtime::genCopyInAssign(builder, loc, temp, inputVariable);
+              mlir::Value copy = builder.create<fir::LoadOp>(loc, temp);
               // Get rid of allocatable flag in the fir.box.
-              temp = builder.create<fir::ReboxOp>(loc, resultAddrType, temp,
-                                                  /*shape=*/mlir::Value{},
-                                                  /*slice=*/mlir::Value{});
-              builder.create<fir::ResultOp>(loc, temp);
+              if (mlir::cast<fir::BaseBoxType>(resultAddrType).isAssumedRank())
+                copy = builder.create<fir::ReboxAssumedRankOp>(
+                    loc, resultAddrType, copy,
+                    fir::LowerBoundModifierAttribute::Preserve);
+              else
+                copy = builder.create<fir::ReboxOp>(loc, resultAddrType, copy,
+                                                    /*shape=*/mlir::Value{},
+                                                    /*slice=*/mlir::Value{});
+              builder.create<fir::ResultOp>(loc, copy);
             })
             .getResults()[0];
     return {addr, builder.genNot(loc, isContiguous)};
@@ -274,34 +258,26 @@ class CopyOutOpConversion : public mlir::OpRewritePattern<hlfir::CopyOutOp> {
     builder.genIfThen(loc, copyOutOp.getWasCopied())
         .genThen([&]() {
           mlir::Value temp = copyOutOp.getTemp();
+          mlir::Value varMutableBox;
+          // Generate CopyOutAssign runtime call.
           if (mlir::Value var = copyOutOp.getVar()) {
-            auto mutableBoxTo = builder.createTemporary(loc, var.getType());
-            builder.create<fir::StoreOp>(loc, var, mutableBoxTo);
-            // Generate CopyOutAssign() call to copy data from the temporary
-            // to the actualArg. Note that in case the actual argument
-            // is ALLOCATABLE/POINTER the CopyOutAssign() implementation
-            // should not engage its reallocation, because the temporary
-            // is rank, shape and type compatible with it.
-            // Moreover, CopyOutAssign() guarantees that there will be no
-            // finalization for the LHS even if it is of a derived type
-            // with finalization.
-            fir::runtime::genCopyOutAssign(builder, loc, mutableBoxTo, temp,
-                                           /*skipToInit=*/true);
+            // Set the variable descriptor pointer in order to copy data from
+            // the temporary to the actualArg. Note that in case the actual
+            // argument is ALLOCATABLE/POINTER the CopyOutAssign()
+            // implementation should not engage its reallocation, because the
+            // temporary is rank, shape and type compatible with it. Moreover,
+            // CopyOutAssign() guarantees that there will be no finalization for
+            // the LHS even if it is of a derived type with finalization.
+            varMutableBox = builder.createTemporary(loc, var.getType());
+            builder.create<fir::StoreOp>(loc, var, varMutableBox);
+          } else {
+            // Even when there is no need to copy back the data (e.g., the dummy
+            // argument was intent(in), CopyOutAssign is called to
+            // destroy/deallocate the temporary.
+            varMutableBox = builder.create<fir::ZeroOp>(loc, temp.getType());
           }
-          // Destroy components of the temporary (if any).
-          fir::runtime::genDerivedTypeDestroyWithoutFinalization(builder, loc,
-                                                                 temp);
-          mlir::Type heapType =
-              fir::HeapType::get(fir::dyn_cast_ptrOrBoxEleTy(temp.getType()));
-          mlir::Value tempAddr =
-              builder.create<fir::BoxAddrOp>(loc, heapType, temp);
-
-          // Deallocate the top-level entity of the temporary.
-          //
-          // Note that this FreeMemOp is coupled with the runtime
-          // allocation engaged by the code generated by
-          // genAllocatableTempFromSourceBox().
-          builder.create<fir::FreeMemOp>(loc, tempAddr);
+          fir::runtime::genCopyOutAssign(builder, loc, varMutableBox,
+                                         copyOutOp.getTemp());
         })
         .end();
     rewriter.eraseOp(copyOutOp);

diff  --git a/flang/runtime/assign.cpp b/flang/runtime/assign.cpp
index 25d2ba4501c11..c3c9b0ba10ab3 100644
--- a/flang/runtime/assign.cpp
+++ b/flang/runtime/assign.cpp
@@ -594,26 +594,24 @@ void RTDEF(AssignTemporary)(Descriptor &to, const Descriptor &from,
   Assign(to, from, terminator, PolymorphicLHS);
 }
 
-void RTDEF(CopyOutAssign)(Descriptor &to, const Descriptor &from,
-    bool skipToInit, const char *sourceFile, int sourceLine) {
+void RTDEF(CopyInAssign)(Descriptor &temp, const Descriptor &var,
+    const char *sourceFile, int sourceLine) {
+  Terminator terminator{sourceFile, sourceLine};
+  temp = var;
+  temp.set_base_addr(nullptr);
+  temp.raw().attribute = CFI_attribute_allocatable;
+  RTNAME(AssignTemporary)(temp, var, sourceFile, sourceLine);
+}
+
+void RTDEF(CopyOutAssign)(
+    Descriptor *var, Descriptor &temp, const char *sourceFile, int sourceLine) {
   Terminator terminator{sourceFile, sourceLine};
-  // Initialize the "to" if it is of derived type that needs initialization.
-  if (!skipToInit) {
-    if (const DescriptorAddendum * addendum{to.Addendum()}) {
-      if (const auto *derived{addendum->derivedType()}) {
-        if (!derived->noInitializationNeeded()) {
-          if (ReturnError(terminator, Initialize(to, *derived, terminator)) !=
-              StatOk) {
-            return;
-          }
-        }
-      }
-    }
-  }
 
   // Copyout from the temporary must not cause any finalizations
-  // for LHS.
-  Assign(to, from, terminator, NoAssignFlags);
+  // for LHS. The variable must be properly initialized already.
+  if (var)
+    Assign(*var, temp, terminator, NoAssignFlags);
+  temp.Destroy(/*finalize=*/false, /*destroyPointers=*/false, &terminator);
 }
 
 void RTDEF(AssignExplicitLengthCharacter)(Descriptor &to,

diff  --git a/flang/test/HLFIR/assumed-type-actual-args.f90 b/flang/test/HLFIR/assumed-type-actual-args.f90
index 7ce1067d7acd7..855542709f622 100644
--- a/flang/test/HLFIR/assumed-type-actual-args.f90
+++ b/flang/test/HLFIR/assumed-type-actual-args.f90
@@ -132,10 +132,10 @@ subroutine s5b(x)
 ! CHECK-SAME:                        %[[VAL_0:.*]]: !fir.box<!fir.array<?xnone>> {fir.bindc_name = "x"}) {
 ! CHECK:           %[[DSCOPE:.*]] = fir.dummy_scope : !fir.dscope
 ! CHECK:           %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[DSCOPE]] {uniq_name = "_QFtest4Ex"} : (!fir.box<!fir.array<?xnone>>, !fir.dscope) -> (!fir.box<!fir.array<?xnone>>, !fir.box<!fir.array<?xnone>>)
-! CHECK:           %[[VAL_2:.*]]:2 = hlfir.copy_in %[[VAL_1]]#0 : (!fir.box<!fir.array<?xnone>>) -> (!fir.box<!fir.array<?xnone>>, i1)
+! CHECK:           %[[VAL_2:.*]]:2 = hlfir.copy_in %[[VAL_1]]#0 to %[[TMP_BOX:.*]] : (!fir.box<!fir.array<?xnone>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xnone>>>>) -> (!fir.box<!fir.array<?xnone>>, i1)
 ! CHECK:           %[[VAL_3:.*]] = fir.box_addr %[[VAL_2]]#0 : (!fir.box<!fir.array<?xnone>>) -> !fir.ref<!fir.array<?xnone>>
 ! CHECK:           fir.call @_QPs4(%[[VAL_3]]) fastmath<contract> : (!fir.ref<!fir.array<?xnone>>) -> ()
-! CHECK:           hlfir.copy_out %[[VAL_2]]#0, %[[VAL_2]]#1 to %[[VAL_1]]#0 : (!fir.box<!fir.array<?xnone>>, i1, !fir.box<!fir.array<?xnone>>) -> ()
+! CHECK:           hlfir.copy_out %[[TMP_BOX]], %[[VAL_2]]#1 to %[[VAL_1]]#0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xnone>>>>, i1, !fir.box<!fir.array<?xnone>>) -> ()
 ! CHECK:           return
 ! CHECK:         }
 
@@ -144,18 +144,17 @@ subroutine s5b(x)
 ! CHECK:           %[[DSCOPE:.*]] = fir.dummy_scope : !fir.dscope
 ! CHECK:           %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[DSCOPE]] {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QFtest3bEx"} : (!fir.box<!fir.array<?xnone>>, !fir.dscope) -> (!fir.box<!fir.array<?xnone>>, !fir.box<!fir.array<?xnone>>)
 ! CHECK:           %[[VAL_2:.*]] = fir.is_present %[[VAL_1]]#0 : (!fir.box<!fir.array<?xnone>>) -> i1
-! CHECK:           %[[VAL_3:.*]]:4 = fir.if %[[VAL_2]] -> (!fir.box<!fir.array<?xnone>>, !fir.box<!fir.array<?xnone>>, i1, !fir.box<!fir.array<?xnone>>) {
-! CHECK:             %[[VAL_4:.*]]:2 = hlfir.copy_in %[[VAL_1]]#0 : (!fir.box<!fir.array<?xnone>>) -> (!fir.box<!fir.array<?xnone>>, i1)
-! CHECK:             fir.result %[[VAL_4]]#0, %[[VAL_4]]#0, %[[VAL_4]]#1, %[[VAL_1]]#0 : !fir.box<!fir.array<?xnone>>, !fir.box<!fir.array<?xnone>>, i1, !fir.box<!fir.array<?xnone>>
+! CHECK:           %[[VAL_3:.*]]:3 = fir.if %[[VAL_2]] -> (!fir.box<!fir.array<?xnone>>, i1, !fir.box<!fir.array<?xnone>>) {
+! CHECK:             %[[VAL_4:.*]]:2 = hlfir.copy_in %[[VAL_1]]#0 to %[[TMP_BOX:.*]] : (!fir.box<!fir.array<?xnone>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xnone>>>>) -> (!fir.box<!fir.array<?xnone>>, i1)
+! CHECK:             fir.result %[[VAL_4]]#0, %[[VAL_4]]#1, %[[VAL_1]]#0 : !fir.box<!fir.array<?xnone>>, i1, !fir.box<!fir.array<?xnone>>
 ! CHECK:           } else {
-! CHECK:             %[[VAL_5:.*]] = fir.absent !fir.box<!fir.array<?xnone>>
 ! CHECK:             %[[VAL_6:.*]] = fir.absent !fir.box<!fir.array<?xnone>>
 ! CHECK:             %[[VAL_7:.*]] = arith.constant false
 ! CHECK:             %[[VAL_8:.*]] = fir.absent !fir.box<!fir.array<?xnone>>
-! CHECK:             fir.result %[[VAL_5]], %[[VAL_6]], %[[VAL_7]], %[[VAL_8]] : !fir.box<!fir.array<?xnone>>, !fir.box<!fir.array<?xnone>>, i1, !fir.box<!fir.array<?xnone>>
+! CHECK:             fir.result %[[VAL_6]], %[[VAL_7]], %[[VAL_8]] : !fir.box<!fir.array<?xnone>>, i1, !fir.box<!fir.array<?xnone>>
 ! CHECK:           }
 ! CHECK:           fir.call @_QPs3b(%[[VAL_9:.*]]#0) fastmath<contract> : (!fir.box<!fir.array<?xnone>>) -> ()
-! CHECK:           hlfir.copy_out %[[VAL_9]]#1, %[[VAL_9]]#2 to %[[VAL_9]]#3 : (!fir.box<!fir.array<?xnone>>, i1, !fir.box<!fir.array<?xnone>>) -> ()
+! CHECK:           hlfir.copy_out %[[TMP_BOX]], %[[VAL_9]]#1 to %[[VAL_9]]#2 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xnone>>>>, i1, !fir.box<!fir.array<?xnone>>) -> ()
 ! CHECK:           return
 ! CHECK:         }
 
@@ -164,19 +163,18 @@ subroutine s5b(x)
 ! CHECK:           %[[DSCOPE:.*]] = fir.dummy_scope : !fir.dscope
 ! CHECK:           %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[DSCOPE]] {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QFtest4bEx"} : (!fir.box<!fir.array<?xnone>>, !fir.dscope) -> (!fir.box<!fir.array<?xnone>>, !fir.box<!fir.array<?xnone>>)
 ! CHECK:           %[[VAL_2:.*]] = fir.is_present %[[VAL_1]]#0 : (!fir.box<!fir.array<?xnone>>) -> i1
-! CHECK:           %[[VAL_3:.*]]:4 = fir.if %[[VAL_2]] -> (!fir.ref<!fir.array<?xnone>>, !fir.box<!fir.array<?xnone>>, i1, !fir.box<!fir.array<?xnone>>) {
-! CHECK:             %[[VAL_4:.*]]:2 = hlfir.copy_in %[[VAL_1]]#0 : (!fir.box<!fir.array<?xnone>>) -> (!fir.box<!fir.array<?xnone>>, i1)
+! CHECK:           %[[VAL_3:.*]]:3 = fir.if %[[VAL_2]] -> (!fir.ref<!fir.array<?xnone>>, i1, !fir.box<!fir.array<?xnone>>) {
+! CHECK:             %[[VAL_4:.*]]:2 = hlfir.copy_in %[[VAL_1]]#0 to %[[TMP_BOX:.*]] : (!fir.box<!fir.array<?xnone>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xnone>>>>) -> (!fir.box<!fir.array<?xnone>>, i1)
 ! CHECK:             %[[VAL_5:.*]] = fir.box_addr %[[VAL_4]]#0 : (!fir.box<!fir.array<?xnone>>) -> !fir.ref<!fir.array<?xnone>>
-! CHECK:             fir.result %[[VAL_5]], %[[VAL_4]]#0, %[[VAL_4]]#1, %[[VAL_1]]#0 : !fir.ref<!fir.array<?xnone>>, !fir.box<!fir.array<?xnone>>, i1, !fir.box<!fir.array<?xnone>>
+! CHECK:             fir.result %[[VAL_5]], %[[VAL_4]]#1, %[[VAL_1]]#0 : !fir.ref<!fir.array<?xnone>>, i1, !fir.box<!fir.array<?xnone>>
 ! CHECK:           } else {
-! CHECK:             %[[VAL_6:.*]] = fir.absent !fir.ref<!fir.array<?xnone>>
-! CHECK:             %[[VAL_7:.*]] = fir.absent !fir.box<!fir.array<?xnone>>
+! CHECK:             %[[VAL_7:.*]] = fir.absent !fir.ref<!fir.array<?xnone>>
 ! CHECK:             %[[VAL_8:.*]] = arith.constant false
 ! CHECK:             %[[VAL_9:.*]] = fir.absent !fir.box<!fir.array<?xnone>>
-! CHECK:             fir.result %[[VAL_6]], %[[VAL_7]], %[[VAL_8]], %[[VAL_9]] : !fir.ref<!fir.array<?xnone>>, !fir.box<!fir.array<?xnone>>, i1, !fir.box<!fir.array<?xnone>>
+! CHECK:             fir.result %[[VAL_7]], %[[VAL_8]], %[[VAL_9]] : !fir.ref<!fir.array<?xnone>>, i1, !fir.box<!fir.array<?xnone>>
 ! CHECK:           }
 ! CHECK:           fir.call @_QPs4b(%[[VAL_10:.*]]#0) fastmath<contract> : (!fir.ref<!fir.array<?xnone>>) -> ()
-! CHECK:           hlfir.copy_out %[[VAL_10]]#1, %[[VAL_10]]#2 to %[[VAL_10]]#3 : (!fir.box<!fir.array<?xnone>>, i1, !fir.box<!fir.array<?xnone>>) -> ()
+! CHECK:           hlfir.copy_out %[[TMP_BOX]], %[[VAL_10]]#1 to %[[VAL_10]]#2 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xnone>>>>, i1, !fir.box<!fir.array<?xnone>>) -> ()
 ! CHECK:           return
 ! CHECK:         }
 
@@ -219,18 +217,17 @@ subroutine s5b(x)
 ! CHECK:           %[[DSCOPE:.*]] = fir.dummy_scope : !fir.dscope
 ! CHECK:           %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[DSCOPE]] {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QFtest5bEx"} : (!fir.box<!fir.array<?xnone>>, !fir.dscope) -> (!fir.box<!fir.array<?xnone>>, !fir.box<!fir.array<?xnone>>)
 ! CHECK:           %[[VAL_2:.*]] = fir.is_present %[[VAL_1]]#0 : (!fir.box<!fir.array<?xnone>>) -> i1
-! CHECK:           %[[VAL_3:.*]]:4 = fir.if %[[VAL_2]] -> (!fir.box<!fir.array<?xnone>>, !fir.box<!fir.array<?xnone>>, i1, !fir.box<!fir.array<?xnone>>) {
-! CHECK:             %[[VAL_4:.*]]:2 = hlfir.copy_in %[[VAL_1]]#0 : (!fir.box<!fir.array<?xnone>>) -> (!fir.box<!fir.array<?xnone>>, i1)
-! CHECK:             fir.result %[[VAL_4]]#0, %[[VAL_4]]#0, %[[VAL_4]]#1, %[[VAL_1]]#0 : !fir.box<!fir.array<?xnone>>, !fir.box<!fir.array<?xnone>>, i1, !fir.box<!fir.array<?xnone>>
+! CHECK:           %[[VAL_3:.*]]:3 = fir.if %[[VAL_2]] -> (!fir.box<!fir.array<?xnone>>, i1, !fir.box<!fir.array<?xnone>>) {
+! CHECK:             %[[VAL_4:.*]]:2 = hlfir.copy_in %[[VAL_1]]#0 to %[[TMP_BOX:.*]] : (!fir.box<!fir.array<?xnone>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xnone>>>>) -> (!fir.box<!fir.array<?xnone>>, i1)
+! CHECK:             fir.result %[[VAL_4]]#0, %[[VAL_4]]#1, %[[VAL_1]]#0 : !fir.box<!fir.array<?xnone>>, i1, !fir.box<!fir.array<?xnone>>
 ! CHECK:           } else {
-! CHECK:             %[[VAL_5:.*]] = fir.absent !fir.box<!fir.array<?xnone>>
 ! CHECK:             %[[VAL_6:.*]] = fir.absent !fir.box<!fir.array<?xnone>>
 ! CHECK:             %[[VAL_7:.*]] = arith.constant false
 ! CHECK:             %[[VAL_8:.*]] = fir.absent !fir.box<!fir.array<?xnone>>
-! CHECK:             fir.result %[[VAL_5]], %[[VAL_6]], %[[VAL_7]], %[[VAL_8]] : !fir.box<!fir.array<?xnone>>, !fir.box<!fir.array<?xnone>>, i1, !fir.box<!fir.array<?xnone>>
+! CHECK:             fir.result %[[VAL_6]], %[[VAL_7]], %[[VAL_8]] : !fir.box<!fir.array<?xnone>>, i1, !fir.box<!fir.array<?xnone>>
 ! CHECK:           }
 ! CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_10:.*]]#0 : (!fir.box<!fir.array<?xnone>>) -> !fir.box<!fir.array<*:none>>
 ! CHECK:           fir.call @_QPs5b(%[[VAL_9]]) fastmath<contract> : (!fir.box<!fir.array<*:none>>) -> ()
-! CHECK:           hlfir.copy_out %[[VAL_10]]#1, %[[VAL_10]]#2 to %[[VAL_10]]#3 : (!fir.box<!fir.array<?xnone>>, i1, !fir.box<!fir.array<?xnone>>) -> ()
+! CHECK:           hlfir.copy_out %[[TMP_BOX]], %[[VAL_10]]#1 to %[[VAL_10]]#2 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xnone>>>>, i1, !fir.box<!fir.array<?xnone>>) -> ()
 ! CHECK:           return
 ! CHECK:         }

diff  --git a/flang/test/HLFIR/assumed_shape_with_value_keyword.f90 b/flang/test/HLFIR/assumed_shape_with_value_keyword.f90
index da3dff16382c8..197efc08422c6 100644
--- a/flang/test/HLFIR/assumed_shape_with_value_keyword.f90
+++ b/flang/test/HLFIR/assumed_shape_with_value_keyword.f90
@@ -10,10 +10,10 @@ subroutine test_integer_value1(x)
 ! CHECK-LABEL:  func.func @_QPtest_integer_value1(
 ! CHECK-SAME:     %[[ARG0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "x"}) {
 ! CHECK:          %[[VAL_0:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs<value>, uniq_name = "_QFtest_integer_value1Ex"} : (!fir.box<!fir.array<?xi32>>, !fir.dscope) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-! CHECK:          %[[VAL_1:.*]]:2 = hlfir.copy_in %[[VAL_0]]#0 : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, i1)
+! CHECK:          %[[VAL_1:.*]]:2 = hlfir.copy_in %[[VAL_0]]#0 to %[[TMP_BOX:.*]] : (!fir.box<!fir.array<?xi32>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.box<!fir.array<?xi32>>, i1)
 ! CHECK:          %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]]#0 : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
 ! CHECK:          fir.call @_QPinternal_call1(%[[VAL_2]]) fastmath<contract> : (!fir.ref<!fir.array<?xi32>>) -> ()
-! CHECK:          hlfir.copy_out %[[VAL_1]]#0, %[[VAL_1]]#1 to %[[VAL_0]]#0 : (!fir.box<!fir.array<?xi32>>, i1, !fir.box<!fir.array<?xi32>>) -> ()
+! CHECK:          hlfir.copy_out %[[TMP_BOX]], %[[VAL_1]]#1 to %[[VAL_0]]#0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, i1, !fir.box<!fir.array<?xi32>>) -> ()
 ! CHECK:          return
 ! CHECK:        }
 
@@ -24,10 +24,10 @@ subroutine test_integer_value2(x)
 ! CHECK-LABEL:  func.func @_QPtest_integer_value2(
 ! CHECK-SAME:     %[[ARG0:.*]]: !fir.box<!fir.array<?x?xi32>> {fir.bindc_name = "x"}) {
 ! CHECK:          %[[VAL_0:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs<value>, uniq_name = "_QFtest_integer_value2Ex"} : (!fir.box<!fir.array<?x?xi32>>, !fir.dscope) -> (!fir.box<!fir.array<?x?xi32>>, !fir.box<!fir.array<?x?xi32>>)
-! CHECK:          %[[VAL_1:.*]]:2 = hlfir.copy_in %[[VAL_0]]#0 : (!fir.box<!fir.array<?x?xi32>>) -> (!fir.box<!fir.array<?x?xi32>>, i1)
+! CHECK:          %[[VAL_1:.*]]:2 = hlfir.copy_in %[[VAL_0]]#0 to %[[TMP_BOX:.*]] : (!fir.box<!fir.array<?x?xi32>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>) -> (!fir.box<!fir.array<?x?xi32>>, i1)
 ! CHECK:          %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]]#0 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.ref<!fir.array<?x?xi32>>
 ! CHECK:          fir.call @_QPinternal_call2(%[[VAL_2]]) fastmath<contract> : (!fir.ref<!fir.array<?x?xi32>>) -> ()
-! CHECK:          hlfir.copy_out %[[VAL_1]]#0, %[[VAL_1]]#1 to %[[VAL_0]]#0 : (!fir.box<!fir.array<?x?xi32>>, i1, !fir.box<!fir.array<?x?xi32>>) -> ()
+! CHECK:          hlfir.copy_out %[[TMP_BOX]], %[[VAL_1]]#1 to %[[VAL_0]]#0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>, i1, !fir.box<!fir.array<?x?xi32>>) -> ()
 ! CHECK:          return
 ! CHECK:        }
 
@@ -38,10 +38,10 @@ subroutine test_real_value1(x)
 ! CHECK-LABEL:  func.func @_QPtest_real_value1(
 ! CHECK-SAME:     %[[ARG0:.*]]: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "x"}) {
 ! CHECK:          %[[VAL_0:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs<value>, uniq_name = "_QFtest_real_value1Ex"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
-! CHECK:          %[[VAL_1:.*]]:2 = hlfir.copy_in %[[VAL_0]]#0 : (!fir.box<!fir.array<?xf32>>) -> (!fir.box<!fir.array<?xf32>>, i1)
+! CHECK:          %[[VAL_1:.*]]:2 = hlfir.copy_in %[[VAL_0]]#0 to %[[TMP_BOX:.*]] : (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.box<!fir.array<?xf32>>, i1)
 ! CHECK:          %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]]#0 : (!fir.box<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>>
 ! CHECK:          fir.call @_QPinternal_call3(%[[VAL_2]]) fastmath<contract> : (!fir.ref<!fir.array<?xf32>>) -> ()
-! CHECK:          hlfir.copy_out %[[VAL_1]]#0, %[[VAL_1]]#1 to %[[VAL_0]]#0 : (!fir.box<!fir.array<?xf32>>, i1, !fir.box<!fir.array<?xf32>>) -> ()
+! CHECK:          hlfir.copy_out %[[TMP_BOX]], %[[VAL_1]]#1 to %[[VAL_0]]#0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, i1, !fir.box<!fir.array<?xf32>>) -> ()
 ! CHECK:          return
 ! CHECK:        }
 
@@ -52,10 +52,10 @@ subroutine test_real_value2(x)
 ! CHECK-LABEL:  func.func @_QPtest_real_value2(
 ! CHECK-SAME:     %[[ARG0:.*]]: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name = "x"}) {
 ! CHECK:          %[[VAL_0:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs<value>, uniq_name = "_QFtest_real_value2Ex"} : (!fir.box<!fir.array<?x?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?x?xf32>>, !fir.box<!fir.array<?x?xf32>>)
-! CHECK:          %[[VAL_1:.*]]:2 = hlfir.copy_in %[[VAL_0]]#0 : (!fir.box<!fir.array<?x?xf32>>) -> (!fir.box<!fir.array<?x?xf32>>, i1)
+! CHECK:          %[[VAL_1:.*]]:2 = hlfir.copy_in %[[VAL_0]]#0 to %[[TMP_BOX:.*]] : (!fir.box<!fir.array<?x?xf32>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>) -> (!fir.box<!fir.array<?x?xf32>>, i1)
 ! CHECK:          %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]]#0 : (!fir.box<!fir.array<?x?xf32>>) -> !fir.ref<!fir.array<?x?xf32>>
 ! CHECK:          fir.call @_QPinternal_call4(%[[VAL_2]]) fastmath<contract> : (!fir.ref<!fir.array<?x?xf32>>) -> ()
-! CHECK:          hlfir.copy_out %[[VAL_1]]#0, %[[VAL_1]]#1 to %[[VAL_0]]#0 : (!fir.box<!fir.array<?x?xf32>>, i1, !fir.box<!fir.array<?x?xf32>>) -> ()
+! CHECK:          hlfir.copy_out %[[TMP_BOX]], %[[VAL_1]]#1 to %[[VAL_0]]#0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>, i1, !fir.box<!fir.array<?x?xf32>>) -> ()
 ! CHECK:          return
 ! CHECK:        }
 
@@ -66,10 +66,10 @@ subroutine test_complex_value1(x)
 ! CHECK-LABEL:  func.func @_QPtest_complex_value1(
 ! CHECK-SAME:     %[[ARG0:.*]]: !fir.box<!fir.array<?x!fir.complex<4>>> {fir.bindc_name = "x"}) {
 ! CHECK:          %[[VAL_0:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs<value>, uniq_name = "_QFtest_complex_value1Ex"} : (!fir.box<!fir.array<?x!fir.complex<4>>>, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.complex<4>>>, !fir.box<!fir.array<?x!fir.complex<4>>>)
-! CHECK:          %[[VAL_1:.*]]:2 = hlfir.copy_in %[[VAL_0]]#0 : (!fir.box<!fir.array<?x!fir.complex<4>>>) -> (!fir.box<!fir.array<?x!fir.complex<4>>>, i1)
+! CHECK:          %[[VAL_1:.*]]:2 = hlfir.copy_in %[[VAL_0]]#0 to %[[TMP_BOX:.*]] : (!fir.box<!fir.array<?x!fir.complex<4>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.complex<4>>>>>) -> (!fir.box<!fir.array<?x!fir.complex<4>>>, i1)
 ! CHECK:          %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]]#0 : (!fir.box<!fir.array<?x!fir.complex<4>>>) -> !fir.ref<!fir.array<?x!fir.complex<4>>>
 ! CHECK:          fir.call @_QPinternal_call5(%[[VAL_2]]) fastmath<contract> : (!fir.ref<!fir.array<?x!fir.complex<4>>>) -> ()
-! CHECK:          hlfir.copy_out %[[VAL_1]]#0, %[[VAL_1]]#1 to %[[VAL_0]]#0 : (!fir.box<!fir.array<?x!fir.complex<4>>>, i1, !fir.box<!fir.array<?x!fir.complex<4>>>) -> ()
+! CHECK:          hlfir.copy_out %[[TMP_BOX]], %[[VAL_1]]#1 to %[[VAL_0]]#0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.complex<4>>>>>, i1, !fir.box<!fir.array<?x!fir.complex<4>>>) -> ()
 ! CHECK:          return
 ! CHECK:        }
 
@@ -80,10 +80,10 @@ subroutine test_complex_value2(x)
 ! CHECK-LABEL:  func.func @_QPtest_complex_value2(
 ! CHECK-SAME:     %[[ARG0:.*]]: !fir.box<!fir.array<?x?x!fir.complex<4>>> {fir.bindc_name = "x"}) {
 ! CHECK:          %[[VAL_0:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs<value>, uniq_name = "_QFtest_complex_value2Ex"} : (!fir.box<!fir.array<?x?x!fir.complex<4>>>, !fir.dscope) -> (!fir.box<!fir.array<?x?x!fir.complex<4>>>, !fir.box<!fir.array<?x?x!fir.complex<4>>>)
-! CHECK:          %[[VAL_1:.*]]:2 = hlfir.copy_in %[[VAL_0]]#0 : (!fir.box<!fir.array<?x?x!fir.complex<4>>>) -> (!fir.box<!fir.array<?x?x!fir.complex<4>>>, i1)
+! CHECK:          %[[VAL_1:.*]]:2 = hlfir.copy_in %[[VAL_0]]#0 to %[[TMP_BOX:.*]] : (!fir.box<!fir.array<?x?x!fir.complex<4>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.complex<4>>>>>) -> (!fir.box<!fir.array<?x?x!fir.complex<4>>>, i1)
 ! CHECK:          %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]]#0 : (!fir.box<!fir.array<?x?x!fir.complex<4>>>) -> !fir.ref<!fir.array<?x?x!fir.complex<4>>>
 ! CHECK:          fir.call @_QPinternal_call6(%[[VAL_2]]) fastmath<contract> : (!fir.ref<!fir.array<?x?x!fir.complex<4>>>) -> ()
-! CHECK:          hlfir.copy_out %[[VAL_1]]#0, %[[VAL_1]]#1 to %[[VAL_0]]#0 : (!fir.box<!fir.array<?x?x!fir.complex<4>>>, i1, !fir.box<!fir.array<?x?x!fir.complex<4>>>) -> ()
+! CHECK:          hlfir.copy_out %[[TMP_BOX]], %[[VAL_1]]#1 to %[[VAL_0]]#0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.complex<4>>>>>, i1, !fir.box<!fir.array<?x?x!fir.complex<4>>>) -> ()
 ! CHECK:          return
 ! CHECK:        }
 
@@ -98,10 +98,10 @@ subroutine test_optional1(x)
 ! CHECK:          %[[VAL_0:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs<optional, value>, uniq_name = "_QFtest_optional1Ex"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
 ! CHECK:          %[[VAL_1:.*]] = fir.is_present %[[VAL_0]]#1 : (!fir.box<!fir.array<?xf32>>) -> i1
 ! CHECK:          fir.if %[[VAL_1:.*]] {
-! CHECK:            %[[VAL_2:.*]]:2 = hlfir.copy_in %[[VAL_0]]#0 : (!fir.box<!fir.array<?xf32>>) -> (!fir.box<!fir.array<?xf32>>, i1)
+! CHECK:            %[[VAL_2:.*]]:2 = hlfir.copy_in %[[VAL_0]]#0 to %[[TMP_BOX:.*]] : (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.box<!fir.array<?xf32>>, i1)
 ! CHECK:            %[[VAL_3:.*]] = fir.box_addr %[[VAL_2]]#0 : (!fir.box<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>>
 ! CHECK:            fir.call @_QPinternal_call7(%[[VAL_3]]) fastmath<contract> : (!fir.ref<!fir.array<?xf32>>) -> ()
-! CHECK:            hlfir.copy_out %[[VAL_2]]#0, %[[VAL_2]]#1 to %[[VAL_0]]#0 : (!fir.box<!fir.array<?xf32>>, i1, !fir.box<!fir.array<?xf32>>) -> ()
+! CHECK:            hlfir.copy_out %[[TMP_BOX]], %[[VAL_2]]#1 to %[[VAL_0]]#0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, i1, !fir.box<!fir.array<?xf32>>) -> ()
 ! CHECK:          } else {
 ! CHECK:          }
 ! CHECK:          return
@@ -118,10 +118,10 @@ subroutine test_optional2(x)
 ! CHECK:          %[[VAL_0:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs<optional, value>, uniq_name = "_QFtest_optional2Ex"} : (!fir.box<!fir.array<?x?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?x?xf32>>, !fir.box<!fir.array<?x?xf32>>)
 ! CHECK:          %[[VAL_1:.*]] = fir.is_present %[[VAL_0]]#1 : (!fir.box<!fir.array<?x?xf32>>) -> i1
 ! CHECK:          fir.if %[[VAL_1:.*]] {
-! CHECK:            %[[VAL_2:.*]]:2 = hlfir.copy_in %[[VAL_0]]#0 : (!fir.box<!fir.array<?x?xf32>>) -> (!fir.box<!fir.array<?x?xf32>>, i1)
+! CHECK:            %[[VAL_2:.*]]:2 = hlfir.copy_in %[[VAL_0]]#0 to %[[TMP_BOX:.*]] : (!fir.box<!fir.array<?x?xf32>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>) -> (!fir.box<!fir.array<?x?xf32>>, i1)
 ! CHECK:            %[[VAL_3:.*]] = fir.box_addr %[[VAL_2]]#0 : (!fir.box<!fir.array<?x?xf32>>) -> !fir.ref<!fir.array<?x?xf32>>
 ! CHECK:            fir.call @_QPinternal_call8(%[[VAL_3]]) fastmath<contract> : (!fir.ref<!fir.array<?x?xf32>>) -> ()
-! CHECK:            hlfir.copy_out %[[VAL_2]]#0, %[[VAL_2]]#1 to %[[VAL_0]]#0 : (!fir.box<!fir.array<?x?xf32>>, i1, !fir.box<!fir.array<?x?xf32>>) -> ()
+! CHECK:            hlfir.copy_out %[[TMP_BOX]], %[[VAL_2]]#1 to %[[VAL_0]]#0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>, i1, !fir.box<!fir.array<?x?xf32>>) -> ()
 ! CHECK:          } else {
 ! CHECK:          }
 ! CHECK:          return

diff  --git a/flang/test/HLFIR/copy-in-out-codegen.fir b/flang/test/HLFIR/copy-in-out-codegen.fir
index c766c2e9fdc4a..8031536550bdf 100644
--- a/flang/test/HLFIR/copy-in-out-codegen.fir
+++ b/flang/test/HLFIR/copy-in-out-codegen.fir
@@ -2,133 +2,115 @@
 
 // RUN: fir-opt %s -convert-hlfir-to-fir | FileCheck %s
 
-func.func @test_copy_in(%box: !fir.box<!fir.array<?xf64>>) {
-  %0:2 = hlfir.copy_in %box : (!fir.box<!fir.array<?xf64>>) -> (!fir.box<!fir.array<?xf64>>, i1)
+func.func @test_copy_in(%box: !fir.box<!fir.array<?xf64>>, %temp: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) {
+  %0:2 = hlfir.copy_in %box to %temp : (!fir.box<!fir.array<?xf64>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.box<!fir.array<?xf64>>, i1)
   return
 }
 // CHECK-LABEL:   func.func @test_copy_in(
-// CHECK-SAME:    %[[VAL_0:.*]]: !fir.box<!fir.array<?xf64>>) {
-// CHECK:    %[[VAL_1:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf64>>>
-// CHECK:    %[[VAL_2:.*]] = fir.convert %[[VAL_0]] : (!fir.box<!fir.array<?xf64>>) -> !fir.box<none>
-// CHECK:    %[[VAL_3:.*]] = fir.call @_FortranAIsContiguous(%[[VAL_2]]) : (!fir.box<none>) -> i1
-// CHECK:    %[[VAL_4:.*]] = fir.if %[[VAL_3]] -> (!fir.box<!fir.array<?xf64>>) {
-// CHECK:      fir.result %[[VAL_0]] : !fir.box<!fir.array<?xf64>>
-// CHECK:    } else {
-// CHECK:      %[[VAL_5:.*]] = fir.zero_bits !fir.heap<!fir.array<?xf64>>
-// CHECK:      %[[VAL_6:.*]] = arith.constant 0 : index
-// CHECK:      %[[VAL_7:.*]] = fir.shape %[[VAL_6]] : (index) -> !fir.shape<1>
-// CHECK:      %[[VAL_8:.*]] = fir.embox %[[VAL_5]](%[[VAL_7]]) : (!fir.heap<!fir.array<?xf64>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xf64>>>
-// CHECK:      fir.store %[[VAL_8]] to %[[VAL_1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
-// CHECK:      %[[VAL_12:.*]] = fir.convert %[[VAL_1]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> !fir.ref<!fir.box<none>>
-// CHECK:      %[[VAL_13:.*]] = fir.convert %[[VAL_0]] : (!fir.box<!fir.array<?xf64>>) -> !fir.box<none>
-// CHECK:      %[[VAL_15:.*]] = fir.call @_FortranAAssignTemporary(%[[VAL_12]], %[[VAL_13]],
-// CHECK:      %[[VAL_16:.*]] = fir.load %[[VAL_1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
-// CHECK:      %[[VAL_17:.*]] = fir.rebox %[[VAL_16]] : (!fir.box<!fir.heap<!fir.array<?xf64>>>) -> !fir.box<!fir.array<?xf64>>
-// CHECK:      fir.result %[[VAL_17]] : !fir.box<!fir.array<?xf64>>
-// CHECK:    }
-// CHECK:    %[[VAL_18:.*]] = arith.constant false
-// CHECK:    %[[VAL_19:.*]] = arith.cmpi eq, %[[VAL_3]], %[[VAL_18]] : i1
-// CHECK:    return
-// CHECK:  }
+// CHECK-SAME:                            %[[VAL_0:.*]]: !fir.box<!fir.array<?xf64>>,
+// CHECK-SAME:                            %[[VAL_1:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) {
+// CHECK:           %[[VAL_2:.*]] = fir.convert %[[VAL_0]] : (!fir.box<!fir.array<?xf64>>) -> !fir.box<none>
+// CHECK:           %[[VAL_3:.*]] = fir.call @_FortranAIsContiguous(%[[VAL_2]]) : (!fir.box<none>) -> i1
+// CHECK:           %[[VAL_4:.*]] = fir.if %[[VAL_3]] -> (!fir.box<!fir.array<?xf64>>) {
+// CHECK:             fir.result %[[VAL_0]] : !fir.box<!fir.array<?xf64>>
+// CHECK:           } else {
+// CHECK:             %[[VAL_8:.*]] = fir.convert %[[VAL_1]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> !fir.ref<!fir.box<none>>
+// CHECK:             %[[VAL_9:.*]] = fir.convert %[[VAL_0]] : (!fir.box<!fir.array<?xf64>>) -> !fir.box<none>
+// CHECK:             %[[VAL_11:.*]] = fir.call @_FortranACopyInAssign(%[[VAL_8]], %[[VAL_9]],
+// CHECK:             %[[VAL_12:.*]] = fir.load %[[VAL_1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+// CHECK:             %[[VAL_13:.*]] = fir.rebox %[[VAL_12]] : (!fir.box<!fir.heap<!fir.array<?xf64>>>) -> !fir.box<!fir.array<?xf64>>
+// CHECK:             fir.result %[[VAL_13]] : !fir.box<!fir.array<?xf64>>
+// CHECK:           }
+// CHECK:           %[[VAL_14:.*]] = arith.constant false
+// CHECK:           %[[VAL_15:.*]] = arith.cmpi eq, %[[VAL_3]], %[[VAL_14]] : i1
+// CHECK:           return
+// CHECK:         }
 
-func.func @test_copy_in_optional(%box: !fir.box<!fir.array<?xf64>>, %is_present: i1) {
-  %0:2 = hlfir.copy_in %box handle_optional %is_present : (!fir.box<!fir.array<?xf64>>, i1) -> (!fir.box<!fir.array<?xf64>>, i1)
+func.func @test_copy_in_optional(%box: !fir.box<!fir.array<?xf64>>, %temp: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, %is_present: i1) {
+  %0:2 = hlfir.copy_in %box to %temp handle_optional %is_present : (!fir.box<!fir.array<?xf64>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, i1) -> (!fir.box<!fir.array<?xf64>>, i1)
   return
 }
 // CHECK-LABEL:   func.func @test_copy_in_optional(
-// CHECK-SAME:    %[[VAL_0:.*]]: !fir.box<!fir.array<?xf64>>,
-// CHECK-SAME:    %[[VAL_1:.*]]: i1) {
-// CHECK:    %[[VAL_2:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf64>>>
-// CHECK:    %[[VAL_3:.*]]:2 = fir.if %[[VAL_1]] -> (!fir.box<!fir.array<?xf64>>, i1) {
-// CHECK:      %[[VAL_4:.*]] = fir.convert %[[VAL_0]] : (!fir.box<!fir.array<?xf64>>) -> !fir.box<none>
-// CHECK:      %[[VAL_5:.*]] = fir.call @_FortranAIsContiguous(%[[VAL_4]]) : (!fir.box<none>) -> i1
-// CHECK:      %[[VAL_6:.*]] = fir.if %[[VAL_5]] -> (!fir.box<!fir.array<?xf64>>) {
-// CHECK:        fir.result %[[VAL_0]] : !fir.box<!fir.array<?xf64>>
-// CHECK:      } else {
-// CHECK:        %[[VAL_7:.*]] = fir.zero_bits !fir.heap<!fir.array<?xf64>>
-// CHECK:        %[[VAL_8:.*]] = arith.constant 0 : index
-// CHECK:        %[[VAL_9:.*]] = fir.shape %[[VAL_8]] : (index) -> !fir.shape<1>
-// CHECK:        %[[VAL_10:.*]] = fir.embox %[[VAL_7]](%[[VAL_9]]) : (!fir.heap<!fir.array<?xf64>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xf64>>>
-// CHECK:        fir.store %[[VAL_10]] to %[[VAL_2]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
-// CHECK:        %[[VAL_14:.*]] = fir.convert %[[VAL_2]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> !fir.ref<!fir.box<none>>
-// CHECK:        %[[VAL_15:.*]] = fir.convert %[[VAL_0]] : (!fir.box<!fir.array<?xf64>>) -> !fir.box<none>
-// CHECK:        %[[VAL_17:.*]] = fir.call @_FortranAAssignTemporary(%[[VAL_14]], %[[VAL_15]],
-// CHECK:        %[[VAL_18:.*]] = fir.load %[[VAL_2]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
-// CHECK:        %[[VAL_19:.*]] = fir.rebox %[[VAL_18]] : (!fir.box<!fir.heap<!fir.array<?xf64>>>) -> !fir.box<!fir.array<?xf64>>
-// CHECK:        fir.result %[[VAL_19]] : !fir.box<!fir.array<?xf64>>
-// CHECK:      }
-// CHECK:      %[[VAL_20:.*]] = arith.constant false
-// CHECK:      %[[VAL_21:.*]] = arith.cmpi eq, %[[VAL_5]], %[[VAL_20]] : i1
-// CHECK:      fir.result %[[VAL_22:.*]], %[[VAL_21]] : !fir.box<!fir.array<?xf64>>, i1
-// CHECK:    } else {
-// CHECK:      %[[VAL_23:.*]] = fir.absent !fir.box<!fir.array<?xf64>>
-// CHECK:      fir.result %[[VAL_23]], %[[VAL_1]] : !fir.box<!fir.array<?xf64>>, i1
-// CHECK:    }
+// CHECK-SAME:                                     %[[VAL_0:.*]]: !fir.box<!fir.array<?xf64>>,
+// CHECK-SAME:                                     %[[VAL_1:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>,
+// CHECK-SAME:                                     %[[VAL_2:.*]]: i1) {
+// CHECK:           %[[VAL_3:.*]]:2 = fir.if %[[VAL_2]] -> (!fir.box<!fir.array<?xf64>>, i1) {
+// CHECK:             %[[VAL_4:.*]] = fir.convert %[[VAL_0]] : (!fir.box<!fir.array<?xf64>>) -> !fir.box<none>
+// CHECK:             %[[VAL_5:.*]] = fir.call @_FortranAIsContiguous(%[[VAL_4]]) : (!fir.box<none>) -> i1
+// CHECK:             %[[VAL_6:.*]] = fir.if %[[VAL_5]] -> (!fir.box<!fir.array<?xf64>>) {
+// CHECK:               fir.result %[[VAL_0]] : !fir.box<!fir.array<?xf64>>
+// CHECK:             } else {
+// CHECK:               %[[VAL_10:.*]] = fir.convert %[[VAL_1]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> !fir.ref<!fir.box<none>>
+// CHECK:               %[[VAL_11:.*]] = fir.convert %[[VAL_0]] : (!fir.box<!fir.array<?xf64>>) -> !fir.box<none>
+// CHECK:               %[[VAL_13:.*]] = fir.call @_FortranACopyInAssign(%[[VAL_10]], %[[VAL_11]],
+// CHECK:               %[[VAL_14:.*]] = fir.load %[[VAL_1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+// CHECK:               %[[VAL_15:.*]] = fir.rebox %[[VAL_14]] : (!fir.box<!fir.heap<!fir.array<?xf64>>>) -> !fir.box<!fir.array<?xf64>>
+// CHECK:               fir.result %[[VAL_15]] : !fir.box<!fir.array<?xf64>>
+// CHECK:             }
+// CHECK:             %[[VAL_16:.*]] = arith.constant false
+// CHECK:             %[[VAL_17:.*]] = arith.cmpi eq, %[[VAL_5]], %[[VAL_16]] : i1
+// CHECK:             fir.result %[[VAL_6]], %[[VAL_17]] : !fir.box<!fir.array<?xf64>>, i1
+// CHECK:           } else {
+// CHECK:             %[[VAL_18:.*]] = fir.absent !fir.box<!fir.array<?xf64>>
+// CHECK:             fir.result %[[VAL_18]], %[[VAL_2]] : !fir.box<!fir.array<?xf64>>, i1
+// CHECK:           }
+// CHECK:           return
+// CHECK:         }
 
-func.func @test_copy_out_no_copy_back(%temp: !fir.box<!fir.array<?xf64>>, %was_copied: i1) {
-  hlfir.copy_out %temp, %was_copied : (!fir.box<!fir.array<?xf64>>, i1) -> ()
+func.func @test_copy_out_no_copy_back(%temp: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, %was_copied: i1) {
+  hlfir.copy_out %temp, %was_copied : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, i1) -> ()
   return
 }
 // CHECK-LABEL:   func.func @test_copy_out_no_copy_back(
-// CHECK-SAME:    %[[VAL_0:.*]]: !fir.box<!fir.array<?xf64>>,
-// CHECK-SAME:    %[[VAL_1:.*]]: i1) {
-// CHECK-NEXT:    fir.if %[[VAL_1]] {
-// CHECK-NEXT:      %[[VAL_2:.*]] = fir.convert %[[VAL_0]] : (!fir.box<!fir.array<?xf64>>) -> !fir.box<none>
-// CHECK-NEXT:      %[[VAL_3:.*]] = fir.call @_FortranADestroyWithoutFinalization(%[[VAL_2]]) : (!fir.box<none>) -> none
-// CHECK-NEXT:      %[[VAL_4:.*]] = fir.box_addr %[[VAL_0]] : (!fir.box<!fir.array<?xf64>>) -> !fir.heap<!fir.array<?xf64>>
-// CHECK-NEXT:      fir.freemem %[[VAL_4]] : !fir.heap<!fir.array<?xf64>>
-// CHECK-NEXT:    }
+// CHECK-SAME:                                          %[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>,
+// CHECK-SAME:                                          %[[VAL_1:.*]]: i1) {
+// CHECK:           fir.if %[[VAL_1]] {
+// CHECK:             %[[VAL_2:.*]] = fir.zero_bits !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+// CHECK:             %[[VAL_6:.*]] = fir.convert %[[VAL_2]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> !fir.ref<!fir.box<none>>
+// CHECK:             %[[VAL_7:.*]] = fir.convert %[[VAL_0]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> !fir.ref<!fir.box<none>>
+// CHECK:             %[[VAL_9:.*]] = fir.call @_FortranACopyOutAssign(%[[VAL_6]], %[[VAL_7]],
+// CHECK:           }
+// CHECK:           return
+// CHECK:         }
 
-func.func @test_copy_out_copy_back(%box: !fir.box<!fir.array<?xf64>>, %temp: !fir.box<!fir.array<?xf64>>, %was_copied: i1) {
-  hlfir.copy_out %temp, %was_copied to %box : (!fir.box<!fir.array<?xf64>>, i1, !fir.box<!fir.array<?xf64>>) -> ()
+func.func @test_copy_out_copy_back(%box: !fir.box<!fir.array<?xf64>>, %temp: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, %was_copied: i1) {
+  hlfir.copy_out %temp, %was_copied to %box : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, i1, !fir.box<!fir.array<?xf64>>) -> ()
   return
 }
 // CHECK-LABEL:   func.func @test_copy_out_copy_back(
-// CHECK-SAME:    %[[VAL_0:[^:]*]]: !fir.box<!fir.array<?xf64>>,
-// CHECK-SAME:    %[[VAL_1:.*]]: !fir.box<!fir.array<?xf64>>,
-// CHECK-SAME:    %[[VAL_2:.*]]: i1) {
-// CHECK:    %[[VAL_3:.*]] = fir.alloca !fir.box<!fir.array<?xf64>>
-// CHECK:    fir.if %[[VAL_2]] {
-// CHECK:      fir.store %[[VAL_0]] to %[[VAL_3]] : !fir.ref<!fir.box<!fir.array<?xf64>>>
-// CHECK:      %[[VAL_7:.*]] = arith.constant true
-// CHECK:      %[[VAL_8:.*]] = fir.convert %[[VAL_3]] : (!fir.ref<!fir.box<!fir.array<?xf64>>>) -> !fir.ref<!fir.box<none>>
-// CHECK:      %[[VAL_9:.*]] = fir.convert %[[VAL_1]] : (!fir.box<!fir.array<?xf64>>) -> !fir.box<none>
-// CHECK:      %[[VAL_11:.*]] = fir.call @_FortranACopyOutAssign(%[[VAL_8]], %[[VAL_9]], %[[VAL_7]],
-// CHECK:      %[[VAL_12:.*]] = fir.convert %[[VAL_1]] : (!fir.box<!fir.array<?xf64>>) -> !fir.box<none>
-// CHECK:      %[[VAL_13:.*]] = fir.call @_FortranADestroyWithoutFinalization(%[[VAL_12]]) : (!fir.box<none>) -> none
-// CHECK:      %[[VAL_14:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box<!fir.array<?xf64>>) -> !fir.heap<!fir.array<?xf64>>
-// CHECK:      fir.freemem %[[VAL_14]] : !fir.heap<!fir.array<?xf64>>
-// CHECK:    }
+// CHECK-SAME:                                       %[[VAL_0:.*]]: !fir.box<!fir.array<?xf64>>,
+// CHECK-SAME:                                       %[[VAL_1:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>,
+// CHECK-SAME:                                       %[[VAL_2:.*]]: i1) {
+// CHECK:           %[[VAL_3:.*]] = fir.alloca !fir.box<!fir.array<?xf64>>
+// CHECK:           fir.if %[[VAL_2]] {
+// CHECK:             fir.store %[[VAL_0]] to %[[VAL_3]] : !fir.ref<!fir.box<!fir.array<?xf64>>>
+// CHECK:             %[[VAL_7:.*]] = fir.convert %[[VAL_3]] : (!fir.ref<!fir.box<!fir.array<?xf64>>>) -> !fir.ref<!fir.box<none>>
+// CHECK:             %[[VAL_8:.*]] = fir.convert %[[VAL_1]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> !fir.ref<!fir.box<none>>
+// CHECK:             %[[VAL_10:.*]] = fir.call @_FortranACopyOutAssign(%[[VAL_7]], %[[VAL_8]],
+// CHECK:           }
+// CHECK:           return
+// CHECK:         }
 
-func.func @test_copy_in_poly(%poly : !fir.class<!fir.array<?x!fir.type<test_copy_in_polyTt1{i:i32}>>>) {
-  %0:2 = hlfir.copy_in %poly : (!fir.class<!fir.array<?x!fir.type<test_copy_in_polyTt1{i:i32}>>>) -> (!fir.class<!fir.array<?x!fir.type<test_copy_in_polyTt1{i:i32}>>>, i1)
+func.func @test_copy_in_poly(%poly : !fir.class<!fir.array<?x!fir.type<test_copy_in_polyTt1{i:i32}>>>, %temp: !fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<test_copy_in_polyTt1{i:i32}>>>>>) {
+  %0:2 = hlfir.copy_in %poly to %temp : (!fir.class<!fir.array<?x!fir.type<test_copy_in_polyTt1{i:i32}>>>, !fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<test_copy_in_polyTt1{i:i32}>>>>>) -> (!fir.class<!fir.array<?x!fir.type<test_copy_in_polyTt1{i:i32}>>>, i1)
   return
 }
 // CHECK-LABEL:   func.func @test_copy_in_poly(
-// CHECK-SAME:                                 %[[VAL_0:.*]]: !fir.class<!fir.array<?x!fir.type<test_copy_in_polyTt1{i:i32}>>>) {
-// CHECK:           %[[VAL_1:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?x!fir.type<test_copy_in_polyTt1{i:i32}>>>>
+// CHECK-SAME:                                 %[[VAL_0:.*]]: !fir.class<!fir.array<?x!fir.type<test_copy_in_polyTt1{i:i32}>>>,
+// CHECK-SAME:                                 %[[VAL_1:.*]]: !fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<test_copy_in_polyTt1{i:i32}>>>>>) {
 // CHECK:           %[[VAL_2:.*]] = fir.convert %[[VAL_0]] : (!fir.class<!fir.array<?x!fir.type<test_copy_in_polyTt1{i:i32}>>>) -> !fir.box<none>
 // CHECK:           %[[VAL_3:.*]] = fir.call @_FortranAIsContiguous(%[[VAL_2]]) : (!fir.box<none>) -> i1
 // CHECK:           %[[VAL_4:.*]] = fir.if %[[VAL_3]] -> (!fir.class<!fir.array<?x!fir.type<test_copy_in_polyTt1{i:i32}>>>) {
 // CHECK:             fir.result %[[VAL_0]] : !fir.class<!fir.array<?x!fir.type<test_copy_in_polyTt1{i:i32}>>>
 // CHECK:           } else {
-// CHECK:             %[[VAL_5:.*]] = fir.zero_bits !fir.heap<!fir.array<?x!fir.type<test_copy_in_polyTt1{i:i32}>>>
-// CHECK:             %[[VAL_6:.*]] = arith.constant 0 : index
-// CHECK:             %[[VAL_7:.*]] = fir.shape %[[VAL_6]] : (index) -> !fir.shape<1>
-// CHECK:             %[[VAL_8:.*]] = fir.embox %[[VAL_5]](%[[VAL_7]]) : (!fir.heap<!fir.array<?x!fir.type<test_copy_in_polyTt1{i:i32}>>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?x!fir.type<test_copy_in_polyTt1{i:i32}>>>>
-// CHECK:             fir.store %[[VAL_8]] to %[[VAL_1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<test_copy_in_polyTt1{i:i32}>>>>>
-// CHECK:             %[[VAL_9:.*]] = fir.address_of(@_QQclX{{.*}}) : !fir.ref<!fir.char<1,{{.*}}>>
-// CHECK:             %[[VAL_10:.*]] = arith.constant {{.*}} : index
-// CHECK:             %[[VAL_11:.*]] = arith.constant {{.*}} : i32
-// CHECK:             %[[VAL_12:.*]] = fir.convert %[[VAL_1]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<test_copy_in_polyTt1{i:i32}>>>>>) -> !fir.ref<!fir.box<none>>
-// CHECK:             %[[VAL_13:.*]] = fir.convert %[[VAL_0]] : (!fir.class<!fir.array<?x!fir.type<test_copy_in_polyTt1{i:i32}>>>) -> !fir.box<none>
-// CHECK:             %[[VAL_14:.*]] = fir.convert %[[VAL_9]] : (!fir.ref<!fir.char<1,{{.*}}>>) -> !fir.ref<i8>
-// CHECK:             %[[VAL_15:.*]] = fir.call @_FortranAAssignTemporary(%[[VAL_12]], %[[VAL_13]], %[[VAL_14]], %[[VAL_11]]) : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.ref<i8>, i32) -> none
-// CHECK:             %[[VAL_16:.*]] = fir.load %[[VAL_1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<test_copy_in_polyTt1{i:i32}>>>>>
-// CHECK:             %[[VAL_17:.*]] = fir.rebox %[[VAL_16]] : (!fir.box<!fir.heap<!fir.array<?x!fir.type<test_copy_in_polyTt1{i:i32}>>>>) -> !fir.class<!fir.array<?x!fir.type<test_copy_in_polyTt1{i:i32}>>>
-// CHECK:             fir.result %[[VAL_17]] : !fir.class<!fir.array<?x!fir.type<test_copy_in_polyTt1{i:i32}>>>
+// CHECK:             %[[VAL_8:.*]] = fir.convert %[[VAL_1]] : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<test_copy_in_polyTt1{i:i32}>>>>>) -> !fir.ref<!fir.box<none>>
+// CHECK:             %[[VAL_9:.*]] = fir.convert %[[VAL_0]] : (!fir.class<!fir.array<?x!fir.type<test_copy_in_polyTt1{i:i32}>>>) -> !fir.box<none>
+// CHECK:             %[[VAL_11:.*]] = fir.call @_FortranACopyInAssign(%[[VAL_8]], %[[VAL_9]],
+// CHECK:             %[[VAL_12:.*]] = fir.load %[[VAL_1]] : !fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<test_copy_in_polyTt1{i:i32}>>>>>
+// CHECK:             %[[VAL_13:.*]] = fir.rebox %[[VAL_12]] : (!fir.class<!fir.heap<!fir.array<?x!fir.type<test_copy_in_polyTt1{i:i32}>>>>) -> !fir.class<!fir.array<?x!fir.type<test_copy_in_polyTt1{i:i32}>>>
+// CHECK:             fir.result %[[VAL_13]] : !fir.class<!fir.array<?x!fir.type<test_copy_in_polyTt1{i:i32}>>>
 // CHECK:           }
-// CHECK:           %[[VAL_18:.*]] = arith.constant false
-// CHECK:           %[[VAL_19:.*]] = arith.cmpi eq, %[[VAL_3]], %[[VAL_18]] : i1
+// CHECK:           %[[VAL_14:.*]] = arith.constant false
+// CHECK:           %[[VAL_15:.*]] = arith.cmpi eq, %[[VAL_3]], %[[VAL_14]] : i1
 // CHECK:           return
 // CHECK:         }

diff  --git a/flang/test/HLFIR/copy-in-out.fir b/flang/test/HLFIR/copy-in-out.fir
index be24dcc975fe6..2db0c89c7e44b 100644
--- a/flang/test/HLFIR/copy-in-out.fir
+++ b/flang/test/HLFIR/copy-in-out.fir
@@ -3,25 +3,26 @@
 
 // RUN: fir-opt %s | fir-opt | FileCheck %s
 
-func.func @test_copy_in(%box: !fir.box<!fir.array<?xf64>>, %is_present: i1) {
-  %0:2 = hlfir.copy_in %box : (!fir.box<!fir.array<?xf64>>) -> (!fir.box<!fir.array<?xf64>>, i1)
-  %1:2 = hlfir.copy_in %box handle_optional %is_present : (!fir.box<!fir.array<?xf64>>, i1) -> (!fir.box<!fir.array<?xf64>>, i1)
+func.func @test_copy_in(%box: !fir.box<!fir.array<?xf64>>, %temp: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, %is_present: i1) {
+  %0:2 = hlfir.copy_in %box to %temp : (!fir.box<!fir.array<?xf64>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.box<!fir.array<?xf64>>, i1)
+  %1:2 = hlfir.copy_in %box to %temp handle_optional %is_present : (!fir.box<!fir.array<?xf64>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, i1) -> (!fir.box<!fir.array<?xf64>>, i1)
   return
 }
 // CHECK-LABEL:   func.func @test_copy_in(
 // CHECK-SAME:    %[[VAL_0:.*]]: !fir.box<!fir.array<?xf64>>,
-// CHECK-SAME:    %[[VAL_1:.*]]: i1
-// CHECK:  %[[VAL_2:.*]]:2 = hlfir.copy_in %[[VAL_0]]  : (!fir.box<!fir.array<?xf64>>) -> (!fir.box<!fir.array<?xf64>>, i1)
-// CHECK:  %[[VAL_3:.*]]:2 = hlfir.copy_in %[[VAL_0]] handle_optional %[[VAL_1]] : (!fir.box<!fir.array<?xf64>>, i1) -> (!fir.box<!fir.array<?xf64>>, i1)
+// CHECK-SAME:    %[[VAL_1:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>,
+// CHECK-SAME:    %[[VAL_2:.*]]: i1
+// CHECK:  hlfir.copy_in %[[VAL_0]] to %[[VAL_1]] : (!fir.box<!fir.array<?xf64>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.box<!fir.array<?xf64>>, i1)
+// CHECK:  hlfir.copy_in %[[VAL_0]] to %[[VAL_1]] handle_optional %[[VAL_2]] : (!fir.box<!fir.array<?xf64>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, i1) -> (!fir.box<!fir.array<?xf64>>, i1)
 
-func.func @test_copy_out(%box: !fir.box<!fir.array<?xf64>>, %temp: !fir.box<!fir.array<?xf64>>, %was_copied: i1) {
-  hlfir.copy_out %temp, %was_copied : (!fir.box<!fir.array<?xf64>>, i1) -> ()
-  hlfir.copy_out %temp, %was_copied to %box : (!fir.box<!fir.array<?xf64>>, i1, !fir.box<!fir.array<?xf64>>) -> ()
+func.func @test_copy_out(%box: !fir.box<!fir.array<?xf64>>, %temp: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, %was_copied: i1) {
+  hlfir.copy_out %temp, %was_copied : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, i1) -> ()
+  hlfir.copy_out %temp, %was_copied to %box : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, i1, !fir.box<!fir.array<?xf64>>) -> ()
   return
 }
 // CHECK-LABEL:   func.func @test_copy_out(
 // CHECK-SAME:    %[[VAL_0:[^:]*]]: !fir.box<!fir.array<?xf64>>,
-// CHECK-SAME:    %[[VAL_1:.*]]: !fir.box<!fir.array<?xf64>>,
+// CHECK-SAME:    %[[VAL_1:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>,
 // CHECK-SAME:    %[[VAL_2:.*]]: i1) {
-// CHECK:  hlfir.copy_out %[[VAL_1]], %[[VAL_2]] : (!fir.box<!fir.array<?xf64>>, i1) -> ()
-// CHECK:  hlfir.copy_out %[[VAL_1]], %[[VAL_2]] to %[[VAL_0]] : (!fir.box<!fir.array<?xf64>>, i1, !fir.box<!fir.array<?xf64>>) -> ()
+// CHECK:  hlfir.copy_out %[[VAL_1]], %[[VAL_2]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, i1) -> ()
+// CHECK:  hlfir.copy_out %[[VAL_1]], %[[VAL_2]] to %[[VAL_0]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, i1, !fir.box<!fir.array<?xf64>>) -> ()

diff  --git a/flang/test/HLFIR/memory-effects.fir b/flang/test/HLFIR/memory-effects.fir
index 32d6b264d6c11..66659829284fe 100644
--- a/flang/test/HLFIR/memory-effects.fir
+++ b/flang/test/HLFIR/memory-effects.fir
@@ -242,22 +242,23 @@ func.func @char_extremum(%arg0: !fir.ref<!fir.char<1,10>>, %arg1: !fir.ref<!fir.
   return
 }
 
-func.func @copy_in(%box: !fir.box<!fir.array<?xf64>>, %is_present: i1) {
-// expected-remark at +2 {{found an instance of 'allocate' on resource '<Default>'}}
-// expected-remark at +1 {{found an instance of 'read' on a value, on resource '<Default>'}}
-  %0:2 = hlfir.copy_in %box : (!fir.box<!fir.array<?xf64>>) -> (!fir.box<!fir.array<?xf64>>, i1)
+func.func @copy_in(%box: !fir.box<!fir.array<?xf64>>, %temp: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, %is_present: i1) {
+// expected-remark at +3 {{found an instance of 'allocate' on resource '<Default>'}}
+// expected-remark at +2 {{found an instance of 'read' on a value, on resource '<Default>'}}
+// expected-remark at +1 {{found an instance of 'write' on a value, on resource '<Default>'}}
+  %0:2 = hlfir.copy_in %box to %temp : (!fir.box<!fir.array<?xf64>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.box<!fir.array<?xf64>>, i1)
 // expected-remark at +1 {{operation has no memory effects}}
   return
 }
 
-func.func @copy_out(%box: !fir.box<!fir.array<?xf64>>, %temp: !fir.box<!fir.array<?xf64>>, %was_copied: i1) {
+func.func @copy_out(%box: !fir.box<!fir.array<?xf64>>, %temp: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, %was_copied: i1) {
 // expected-remark at +2 {{found an instance of 'free' on resource '<Default>'}}
 // expected-remark at +1 {{found an instance of 'read' on a value, on resource '<Default>'}}
-  hlfir.copy_out %temp, %was_copied : (!fir.box<!fir.array<?xf64>>, i1) -> ()
+  hlfir.copy_out %temp, %was_copied : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, i1) -> ()
 // expected-remark at +3 {{found an instance of 'free' on resource '<Default>'}}
 // expected-remark at +2 {{found an instance of 'read' on a value, on resource '<Default>'}}
 // expected-remark at +1 {{found an instance of 'write' on a value, on resource '<Default>'}}
-  hlfir.copy_out %temp, %was_copied to %box : (!fir.box<!fir.array<?xf64>>, i1, !fir.box<!fir.array<?xf64>>) -> ()
+  hlfir.copy_out %temp, %was_copied to %box : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, i1, !fir.box<!fir.array<?xf64>>) -> ()
 // expected-remark at +1 {{operation has no memory effects}}
   return
 }

diff  --git a/flang/test/Lower/HLFIR/call-sequence-associated-descriptors.f90 b/flang/test/Lower/HLFIR/call-sequence-associated-descriptors.f90
index c363ab0df2fe2..ccbc1df96a73a 100644
--- a/flang/test/Lower/HLFIR/call-sequence-associated-descriptors.f90
+++ b/flang/test/Lower/HLFIR/call-sequence-associated-descriptors.f90
@@ -58,7 +58,7 @@ subroutine test_char_copy_in_copy_out(x)
 ! CHECK-LABEL:   func.func @_QMbindc_seq_assocPtest_char_copy_in_copy_out(
 ! CHECK:           %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0:.*]] dummy_scope %{{[0-9]+}} {uniq_name = "_QMbindc_seq_assocFtest_char_copy_in_copy_outEx"} : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, !fir.dscope) -> (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, !fir.box<!fir.array<?x?x!fir.char<1,?>>>)
 ! CHECK:           %[[VAL_2:.*]] = arith.constant 100 : i32
-! CHECK:           %[[VAL_3:.*]]:2 = hlfir.copy_in %[[VAL_1]]#0 : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) -> (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, i1)
+! CHECK:           %[[VAL_3:.*]]:2 = hlfir.copy_in %[[VAL_1]]#0 to %[[TMP_BOX:.*]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.char<1,?>>>>>) -> (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, i1)
 ! CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
 ! CHECK:           %[[VAL_5:.*]] = fir.shift %[[VAL_4]], %[[VAL_4]] : (index, index) -> !fir.shift<2>
 ! CHECK:           %[[VAL_6:.*]] = fir.rebox %[[VAL_3]]#0(%[[VAL_5]]) : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, !fir.shift<2>) -> !fir.box<!fir.array<?x?x!fir.char<1,?>>>
@@ -81,7 +81,7 @@ subroutine test_char_copy_in_copy_out(x)
 ! CHECK:           %[[VAL_23:.*]] = fir.convert %[[VAL_22]] : (!fir.ref<!fir.array<?x?x!fir.char<1,?>>>) -> !fir.ref<!fir.array<?x!fir.char<1,?>>>
 ! CHECK:           %[[VAL_24:.*]] = fir.embox %[[VAL_23]](%[[VAL_20]]) typeparams %[[VAL_21]] : (!fir.ref<!fir.array<?x!fir.char<1,?>>>, !fir.shapeshift<1>, index) -> !fir.box<!fir.array<?x!fir.char<1,?>>>
 ! CHECK:           fir.call @takes_char(%[[VAL_24]], %[[VAL_7]]#1) fastmath<contract> {is_bind_c} : (!fir.box<!fir.array<?x!fir.char<1,?>>>, !fir.ref<i32>) -> ()
-! CHECK:           hlfir.copy_out %[[VAL_3]]#0, %[[VAL_3]]#1 to %[[VAL_1]]#0 : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, i1, !fir.box<!fir.array<?x?x!fir.char<1,?>>>) -> ()
+! CHECK:           hlfir.copy_out %[[TMP_BOX]], %[[VAL_3]]#1 to %[[VAL_1]]#0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.char<1,?>>>>>, i1, !fir.box<!fir.array<?x?x!fir.char<1,?>>>) -> ()
 ! CHECK:           hlfir.end_associate %[[VAL_7]]#1, %[[VAL_7]]#2 : !fir.ref<i32>, i1
 ! CHECK:           return
 ! CHECK:         }
@@ -92,7 +92,7 @@ subroutine test_char_assumed_size(x)
   end subroutine
 ! CHECK-LABEL:   func.func @_QMbindc_seq_assocPtest_char_assumed_size(
 ! CHECK:           %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0:.*]] dummy_scope %{{[0-9]+}} {uniq_name = "_QMbindc_seq_assocFtest_char_assumed_sizeEx"} : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, !fir.dscope) -> (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, !fir.box<!fir.array<?x?x!fir.char<1,?>>>)
-! CHECK:           %[[VAL_2:.*]]:2 = hlfir.copy_in %[[VAL_1]]#0 : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) -> (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, i1)
+! CHECK:           %[[VAL_2:.*]]:2 = hlfir.copy_in %[[VAL_1]]#0 to %[[TMP_BOX:.*]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.char<1,?>>>>>) -> (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, i1)
 ! CHECK:           %[[VAL_3:.*]] = arith.constant 0 : index
 ! CHECK:           %[[VAL_4:.*]] = fir.shift %[[VAL_3]], %[[VAL_3]] : (index, index) -> !fir.shift<2>
 ! CHECK:           %[[VAL_5:.*]] = fir.rebox %[[VAL_2]]#0(%[[VAL_4]]) : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, !fir.shift<2>) -> !fir.box<!fir.array<?x?x!fir.char<1,?>>>
@@ -114,7 +114,7 @@ subroutine test_char_assumed_size(x)
 ! CHECK:           %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (!fir.ref<!fir.array<?x?x!fir.char<1,?>>>) -> !fir.ref<!fir.array<10x?x!fir.char<1,?>>>
 ! CHECK:           %[[VAL_22:.*]] = fir.embox %[[VAL_21]](%[[VAL_18]]) typeparams %[[VAL_19]] : (!fir.ref<!fir.array<10x?x!fir.char<1,?>>>, !fir.shapeshift<2>, index) -> !fir.box<!fir.array<10x?x!fir.char<1,?>>>
 ! CHECK:           fir.call @takes_char_assumed_size(%[[VAL_22]]) fastmath<contract> {is_bind_c} : (!fir.box<!fir.array<10x?x!fir.char<1,?>>>) -> ()
-! CHECK:           hlfir.copy_out %[[VAL_2]]#0, %[[VAL_2]]#1 to %[[VAL_1]]#0 : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, i1, !fir.box<!fir.array<?x?x!fir.char<1,?>>>) -> ()
+! CHECK:           hlfir.copy_out %[[TMP_BOX]], %[[VAL_2]]#1 to %[[VAL_1]]#0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.char<1,?>>>>>, i1, !fir.box<!fir.array<?x?x!fir.char<1,?>>>) -> ()
 ! CHECK:           return
 ! CHECK:         }
 
@@ -216,7 +216,7 @@ subroutine test_poly_copy_in_copy_out(x)
 ! CHECK-LABEL:   func.func @_QMpoly_seq_assocPtest_poly_copy_in_copy_out(
 ! CHECK:           %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0:.*]] dummy_scope %{{[0-9]+}} {uniq_name = "_QMpoly_seq_assocFtest_poly_copy_in_copy_outEx"} : (!fir.class<!fir.array<?x?xnone>>, !fir.dscope) -> (!fir.class<!fir.array<?x?xnone>>, !fir.class<!fir.array<?x?xnone>>)
 ! CHECK:           %[[VAL_2:.*]] = arith.constant 100 : i32
-! CHECK:           %[[VAL_3:.*]]:2 = hlfir.copy_in %[[VAL_1]]#0 : (!fir.class<!fir.array<?x?xnone>>) -> (!fir.class<!fir.array<?x?xnone>>, i1)
+! CHECK:           %[[VAL_3:.*]]:2 = hlfir.copy_in %[[VAL_1]]#0 to %[[TMP_BOX:.*]] : (!fir.class<!fir.array<?x?xnone>>, !fir.ref<!fir.class<!fir.heap<!fir.array<?x?xnone>>>>) -> (!fir.class<!fir.array<?x?xnone>>, i1)
 ! CHECK:           %[[VAL_4:.*]]:3 = hlfir.associate %[[VAL_2]] {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
 ! CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]]#1 {uniq_name = "_QMpoly_seq_assocFtakes_polyEn"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:           %[[VAL_6:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
@@ -234,7 +234,7 @@ subroutine test_poly_copy_in_copy_out(x)
 ! CHECK:           %[[VAL_18:.*]] = fir.convert %[[VAL_17]] : (!fir.ref<!fir.array<?x?xnone>>) -> !fir.ref<!fir.array<?xnone>>
 ! CHECK:           %[[VAL_19:.*]] = fir.embox %[[VAL_18]](%[[VAL_16]]) source_box %[[VAL_3]]#0 : (!fir.ref<!fir.array<?xnone>>, !fir.shape<1>, !fir.class<!fir.array<?x?xnone>>) -> !fir.class<!fir.array<?xnone>>
 ! CHECK:           fir.call @_QPtakes_poly(%[[VAL_19]], %[[VAL_4]]#1) fastmath<contract> : (!fir.class<!fir.array<?xnone>>, !fir.ref<i32>) -> ()
-! CHECK:           hlfir.copy_out %[[VAL_3]]#0, %[[VAL_3]]#1 to %[[VAL_1]]#0 : (!fir.class<!fir.array<?x?xnone>>, i1, !fir.class<!fir.array<?x?xnone>>) -> ()
+! CHECK:           hlfir.copy_out %[[TMP_BOX]], %[[VAL_3]]#1 to %[[VAL_1]]#0 : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?xnone>>>>, i1, !fir.class<!fir.array<?x?xnone>>) -> ()
 ! CHECK:           hlfir.end_associate %[[VAL_4]]#1, %[[VAL_4]]#2 : !fir.ref<i32>, i1
 ! CHECK:           return
 ! CHECK:         }
@@ -245,7 +245,7 @@ subroutine test_poly_assumed_size(x)
   end subroutine
 ! CHECK-LABEL:   func.func @_QMpoly_seq_assocPtest_poly_assumed_size(
 ! CHECK:           %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0:.*]] dummy_scope %{{[0-9]+}} {uniq_name = "_QMpoly_seq_assocFtest_poly_assumed_sizeEx"} : (!fir.class<!fir.array<?x?xnone>>, !fir.dscope) -> (!fir.class<!fir.array<?x?xnone>>, !fir.class<!fir.array<?x?xnone>>)
-! CHECK:           %[[VAL_2:.*]]:2 = hlfir.copy_in %[[VAL_1]]#0 : (!fir.class<!fir.array<?x?xnone>>) -> (!fir.class<!fir.array<?x?xnone>>, i1)
+! CHECK:           %[[VAL_2:.*]]:2 = hlfir.copy_in %[[VAL_1]]#0 to %[[TMP_BOX:.*]] : (!fir.class<!fir.array<?x?xnone>>, !fir.ref<!fir.class<!fir.heap<!fir.array<?x?xnone>>>>) -> (!fir.class<!fir.array<?x?xnone>>, i1)
 ! CHECK:           %[[VAL_3:.*]] = arith.constant 10 : i64
 ! CHECK:           %[[VAL_4:.*]] = arith.constant 1 : i64
 ! CHECK:           %[[VAL_5:.*]] = arith.subi %[[VAL_3]], %[[VAL_4]] : i64
@@ -262,7 +262,7 @@ subroutine test_poly_assumed_size(x)
 ! CHECK:           %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (!fir.ref<!fir.array<?x?xnone>>) -> !fir.ref<!fir.array<10x?xnone>>
 ! CHECK:           %[[VAL_17:.*]] = fir.embox %[[VAL_16]](%[[VAL_14]]) source_box %[[VAL_2]]#0 : (!fir.ref<!fir.array<10x?xnone>>, !fir.shape<2>, !fir.class<!fir.array<?x?xnone>>) -> !fir.class<!fir.array<10x?xnone>>
 ! CHECK:           fir.call @_QPtakes_poly_assumed_size(%[[VAL_17]]) fastmath<contract> : (!fir.class<!fir.array<10x?xnone>>) -> ()
-! CHECK:           hlfir.copy_out %[[VAL_2]]#0, %[[VAL_2]]#1 to %[[VAL_1]]#0 : (!fir.class<!fir.array<?x?xnone>>, i1, !fir.class<!fir.array<?x?xnone>>) -> ()
+! CHECK:           hlfir.copy_out %[[TMP_BOX]], %[[VAL_2]]#1 to %[[VAL_1]]#0 : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?xnone>>>>, i1, !fir.class<!fir.array<?x?xnone>>) -> ()
 ! CHECK:           return
 ! CHECK:         }
 

diff  --git a/flang/test/Lower/HLFIR/calls-assumed-shape.f90 b/flang/test/Lower/HLFIR/calls-assumed-shape.f90
index cfe607a69102e..ee8eda5be6a68 100644
--- a/flang/test/Lower/HLFIR/calls-assumed-shape.f90
+++ b/flang/test/Lower/HLFIR/calls-assumed-shape.f90
@@ -42,10 +42,10 @@ subroutine takes_contiguous_assumed(x)
 ! CHECK-LABEL: func.func @_QPtest_ptr_to_contiguous_assumed(
 ! CHECK:  %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0:[a-z0-9]*]] dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest_ptr_to_contiguous_assumedEp"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>)
 ! CHECK:  %[[VAL_2:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>
-! CHECK:  %[[VAL_3:.*]]:2 = hlfir.copy_in %[[VAL_2]] : (!fir.box<!fir.ptr<!fir.array<?xf32>>>) -> (!fir.box<!fir.ptr<!fir.array<?xf32>>>, i1)
+! CHECK:  %[[VAL_3:.*]]:2 = hlfir.copy_in %[[VAL_2]] to %[[TMP_BOX:.*]] : (!fir.box<!fir.ptr<!fir.array<?xf32>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.box<!fir.ptr<!fir.array<?xf32>>>, i1)
 ! CHECK:  %[[VAL_4:.*]] = fir.rebox %[[VAL_3]]#0 : (!fir.box<!fir.ptr<!fir.array<?xf32>>>) -> !fir.box<!fir.array<?xf32>>
 ! CHECK:  fir.call @_QPtakes_contiguous_assumed(%[[VAL_4]]) {{.*}} : (!fir.box<!fir.array<?xf32>>) -> ()
-! CHECK:  hlfir.copy_out %[[VAL_3]]#0, %[[VAL_3]]#1 to %[[VAL_2]] : (!fir.box<!fir.ptr<!fir.array<?xf32>>>, i1, !fir.box<!fir.ptr<!fir.array<?xf32>>>) -> ()
+! CHECK:  hlfir.copy_out %[[TMP_BOX]], %[[VAL_3]]#1 to %[[VAL_2]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, i1, !fir.box<!fir.ptr<!fir.array<?xf32>>>) -> ()
 
 subroutine test_ptr_to_contiguous_assumed_classstar(p)
   interface
@@ -59,10 +59,10 @@ subroutine takes_contiguous_assumed_classstar(x)
 ! CHECK-LABEL: func.func @_QPtest_ptr_to_contiguous_assumed_classstar(
 ! CHECK:  %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0:[a-z0-9]*]] dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest_ptr_to_contiguous_assumed_classstarEp"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>)
 ! CHECK:  %[[VAL_2:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>
-! CHECK:  %[[VAL_3:.*]]:2 = hlfir.copy_in %[[VAL_2]] : (!fir.box<!fir.ptr<!fir.array<?xf32>>>) -> (!fir.box<!fir.ptr<!fir.array<?xf32>>>, i1)
+! CHECK:  %[[VAL_3:.*]]:2 = hlfir.copy_in %[[VAL_2]] to %[[TMP_BOX:.*]] : (!fir.box<!fir.ptr<!fir.array<?xf32>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.box<!fir.ptr<!fir.array<?xf32>>>, i1)
 ! CHECK:  %[[VAL_4:.*]] = fir.rebox %[[VAL_3]]#0 : (!fir.box<!fir.ptr<!fir.array<?xf32>>>) -> !fir.class<!fir.array<?xnone>>
 ! CHECK:  fir.call @_QPtakes_contiguous_assumed_classstar(%[[VAL_4]]) {{.*}} : (!fir.class<!fir.array<?xnone>>) -> ()
-! CHECK:  hlfir.copy_out %[[VAL_3]]#0, %[[VAL_3]]#1 to %[[VAL_2]] : (!fir.box<!fir.ptr<!fir.array<?xf32>>>, i1, !fir.box<!fir.ptr<!fir.array<?xf32>>>) -> ()
+! CHECK:  hlfir.copy_out %[[TMP_BOX]], %[[VAL_3]]#1 to %[[VAL_2]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, i1, !fir.box<!fir.ptr<!fir.array<?xf32>>>) -> ()
 
 subroutine test_ptr_to_assumed_typestar(p)
   interface

diff  --git a/flang/test/Lower/HLFIR/calls-constant-expr-arg.f90 b/flang/test/Lower/HLFIR/calls-constant-expr-arg.f90
index 7c8faf4fca8f5..61e7ef959d33f 100644
--- a/flang/test/Lower/HLFIR/calls-constant-expr-arg.f90
+++ b/flang/test/Lower/HLFIR/calls-constant-expr-arg.f90
@@ -37,10 +37,10 @@ end subroutine sub
 ! CHECK:           %[[VAL_18:.*]] = arith.select %[[VAL_17]], %[[VAL_16]], %[[VAL_13]] : index
 ! CHECK:           %[[VAL_19:.*]] = fir.shape %[[VAL_18]] : (index) -> !fir.shape<1>
 ! CHECK:           %[[VAL_20:.*]] = hlfir.designate %[[VAL_10]]#0 (%[[VAL_11]]:%[[VAL_8]]:%[[VAL_12]])  shape %[[VAL_19]] : (!fir.box<!fir.array<?xi32>>, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>>
-! CHECK:           %[[VAL_21:.*]]:2 = hlfir.copy_in %[[VAL_20]] : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, i1)
+! CHECK:           %[[VAL_21:.*]]:2 = hlfir.copy_in %[[VAL_20]] to %[[TMP_BOX:.*]] : (!fir.box<!fir.array<?xi32>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.box<!fir.array<?xi32>>, i1)
 ! CHECK:           %[[VAL_22:.*]] = fir.box_addr %[[VAL_21]]#0 : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
 ! CHECK:           fir.call @_QPsub2(%[[VAL_22]]) fastmath<contract> : (!fir.ref<!fir.array<?xi32>>) -> ()
-! CHECK:           hlfir.copy_out %[[VAL_21]]#0, %[[VAL_21]]#1 to %[[VAL_20]] : (!fir.box<!fir.array<?xi32>>, i1, !fir.box<!fir.array<?xi32>>) -> ()
+! CHECK:           hlfir.copy_out %[[TMP_BOX]], %[[VAL_21]]#1 to %[[VAL_20]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, i1, !fir.box<!fir.array<?xi32>>) -> ()
 ! CHECK:           return
 ! CHECK:         }
 

diff  --git a/flang/test/Lower/HLFIR/calls-optional.f90 b/flang/test/Lower/HLFIR/calls-optional.f90
index 1ada5b198aed2..69a6e7fafff27 100644
--- a/flang/test/Lower/HLFIR/calls-optional.f90
+++ b/flang/test/Lower/HLFIR/calls-optional.f90
@@ -16,19 +16,18 @@ subroutine takes_optional_explicit(x)
 ! CHECK-LABEL: func.func @_QPoptional_copy_in_out(
 ! CHECK:  %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0:[a-z0-9]*]] dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QFoptional_copy_in_outEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
 ! CHECK:  %[[VAL_2:.*]] = fir.is_present %[[VAL_1]]#0 : (!fir.box<!fir.array<?xf32>>) -> i1
-! CHECK:  %[[VAL_3:.*]]:4 = fir.if %[[VAL_2]] -> (!fir.ref<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>, i1, !fir.box<!fir.array<?xf32>>) {
-! CHECK:    %[[VAL_4:.*]]:2 = hlfir.copy_in %[[VAL_1]]#0 : (!fir.box<!fir.array<?xf32>>) -> (!fir.box<!fir.array<?xf32>>, i1)
+! CHECK:  %[[VAL_3:.*]]:3 = fir.if %[[VAL_2]] -> (!fir.ref<!fir.array<?xf32>>, i1, !fir.box<!fir.array<?xf32>>) {
+! CHECK:    %[[VAL_4:.*]]:2 = hlfir.copy_in %[[VAL_1]]#0 to %[[TMP_BOX:.*]] : (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.box<!fir.array<?xf32>>, i1)
 ! CHECK:    %[[VAL_5:.*]] = fir.box_addr %[[VAL_4]]#0 : (!fir.box<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>>
-! CHECK:    fir.result %[[VAL_5]], %[[VAL_4]]#0, %[[VAL_4]]#1, %[[VAL_1]]#0 : !fir.ref<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>, i1, !fir.box<!fir.array<?xf32>>
+! CHECK:    fir.result %[[VAL_5]], %[[VAL_4]]#1, %[[VAL_1]]#0 : !fir.ref<!fir.array<?xf32>>, i1, !fir.box<!fir.array<?xf32>>
 ! CHECK:  } else {
-! CHECK:    %[[VAL_6:.*]] = fir.absent !fir.ref<!fir.array<?xf32>>
-! CHECK:    %[[VAL_7:.*]] = fir.absent !fir.box<!fir.array<?xf32>>
+! CHECK:    %[[VAL_7:.*]] = fir.absent !fir.ref<!fir.array<?xf32>>
 ! CHECK:    %[[VAL_8:.*]] = arith.constant false
 ! CHECK:    %[[VAL_9:.*]] = fir.absent !fir.box<!fir.array<?xf32>>
-! CHECK:    fir.result %[[VAL_6]], %[[VAL_7]], %[[VAL_8]], %[[VAL_9]] : !fir.ref<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>, i1, !fir.box<!fir.array<?xf32>>
+! CHECK:    fir.result %[[VAL_7]], %[[VAL_8]], %[[VAL_9]] : !fir.ref<!fir.array<?xf32>>, i1, !fir.box<!fir.array<?xf32>>
 ! CHECK:  }
 ! CHECK:  fir.call @_QPtakes_optional_explicit(%[[VAL_3]]#0) {{.*}} : (!fir.ref<!fir.array<?xf32>>) -> ()
-! CHECK:  hlfir.copy_out %[[VAL_3]]#1, %[[VAL_3]]#2 to %[[VAL_3]]#3 : (!fir.box<!fir.array<?xf32>>, i1, !fir.box<!fir.array<?xf32>>) -> ()
+! CHECK:  hlfir.copy_out %[[TMP_BOX]], %[[VAL_3]]#1 to %[[VAL_3]]#2 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, i1, !fir.box<!fir.array<?xf32>>) -> ()
 
 subroutine optional_value_copy(x)
   interface

diff  --git a/flang/test/Lower/HLFIR/calls-poly-to-assumed-type.f90 b/flang/test/Lower/HLFIR/calls-poly-to-assumed-type.f90
index 05885e729f93f..d607e7422a31f 100644
--- a/flang/test/Lower/HLFIR/calls-poly-to-assumed-type.f90
+++ b/flang/test/Lower/HLFIR/calls-poly-to-assumed-type.f90
@@ -13,8 +13,8 @@ subroutine assumed_type_assumed_size(x)
 end subroutine
 ! CHECK-LABEL:   func.func @_QPpass_poly_to_assumed_type_assumed_size(
 ! CHECK:           %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0:[a-z0-9]*]] dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs<target>, uniq_name = "_QFpass_poly_to_assumed_type_assumed_sizeEx"} : (!fir.class<!fir.array<?x?xnone>>, !fir.dscope) -> (!fir.class<!fir.array<?x?xnone>>, !fir.class<!fir.array<?x?xnone>>)
-! CHECK:           %[[VAL_2:.*]]:2 = hlfir.copy_in %[[VAL_1]]#0 : (!fir.class<!fir.array<?x?xnone>>) -> (!fir.class<!fir.array<?x?xnone>>, i1)
+! CHECK:           %[[VAL_2:.*]]:2 = hlfir.copy_in %[[VAL_1]]#0 to %[[TMP_BOX:.*]] : (!fir.class<!fir.array<?x?xnone>>, !fir.ref<!fir.class<!fir.heap<!fir.array<?x?xnone>>>>) -> (!fir.class<!fir.array<?x?xnone>>, i1)
 ! CHECK:           %[[VAL_3:.*]] = fir.box_addr %[[VAL_2]]#0 : (!fir.class<!fir.array<?x?xnone>>) -> !fir.ref<!fir.array<?x?xnone>>
 ! CHECK:           %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (!fir.ref<!fir.array<?x?xnone>>) -> !fir.ref<!fir.array<?xnone>>
 ! CHECK:           fir.call @_QPassumed_type_assumed_size(%[[VAL_4]]) fastmath<contract> : (!fir.ref<!fir.array<?xnone>>) -> ()
-! CHECK:           hlfir.copy_out %[[VAL_2]]#0, %[[VAL_2]]#1 to %[[VAL_1]]#0 : (!fir.class<!fir.array<?x?xnone>>, i1, !fir.class<!fir.array<?x?xnone>>) -> ()
+! CHECK:           hlfir.copy_out %[[TMP_BOX]], %[[VAL_2]]#1 to %[[VAL_1]]#0 : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?xnone>>>>, i1, !fir.class<!fir.array<?x?xnone>>) -> ()

diff  --git a/flang/test/Lower/HLFIR/poly_expr_for_nonpoly_dummy.f90 b/flang/test/Lower/HLFIR/poly_expr_for_nonpoly_dummy.f90
index f5ec7c35594bd..3f97a9f848d43 100644
--- a/flang/test/Lower/HLFIR/poly_expr_for_nonpoly_dummy.f90
+++ b/flang/test/Lower/HLFIR/poly_expr_for_nonpoly_dummy.f90
@@ -25,9 +25,9 @@ end subroutine test1
 ! CHECK:           %[[VAL_26:.*]] = fir.shape %[[VAL_25]]#1 : (index) -> !fir.shape<1>
 ! CHECK:           %[[VAL_27:.*]]:3 = hlfir.associate %[[VAL_23]](%[[VAL_26]]) {adapt.valuebyref} : (!hlfir.expr<?x!fir.type<_QMtypesTt>?>, !fir.shape<1>) -> (!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMtypesTt>>>>, !fir.class<!fir.heap<!fir.array<?x!fir.type<_QMtypesTt>>>>, i1)
 ! CHECK:           %[[VAL_28:.*]] = fir.rebox %[[VAL_27]]#0 : (!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMtypesTt>>>>) -> !fir.box<!fir.array<?x!fir.type<_QMtypesTt>>>
-! CHECK:           %[[VAL_29:.*]]:2 = hlfir.copy_in %[[VAL_28]] : (!fir.box<!fir.array<?x!fir.type<_QMtypesTt>>>) -> (!fir.box<!fir.array<?x!fir.type<_QMtypesTt>>>, i1)
+! CHECK:           %[[VAL_29:.*]]:2 = hlfir.copy_in %[[VAL_28]] to %[[TMP_BOX:.*]] : (!fir.box<!fir.array<?x!fir.type<_QMtypesTt>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<_QMtypesTt>>>>>) -> (!fir.box<!fir.array<?x!fir.type<_QMtypesTt>>>, i1)
 ! CHECK:           fir.call @_QMtypesPcallee(%[[VAL_29]]#0) fastmath<contract> : (!fir.box<!fir.array<?x!fir.type<_QMtypesTt>>>) -> ()
-! CHECK:           hlfir.copy_out %[[VAL_29]]#0, %[[VAL_29]]#1 : (!fir.box<!fir.array<?x!fir.type<_QMtypesTt>>>, i1) -> ()
+! CHECK:           hlfir.copy_out %[[TMP_BOX]], %[[VAL_29]]#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<_QMtypesTt>>>>>, i1) -> ()
 ! CHECK:           hlfir.end_associate %[[VAL_27]]#0, %[[VAL_27]]#2 : !fir.class<!fir.heap<!fir.array<?x!fir.type<_QMtypesTt>>>>, i1
 ! CHECK:           hlfir.destroy %[[VAL_23]] : !hlfir.expr<?x!fir.type<_QMtypesTt>?>
 
@@ -40,8 +40,8 @@ end subroutine test2
 ! CHECK:           %[[VAL_5:.*]] = hlfir.elemental %{{.*}} mold %{{.*}} unordered : (!fir.shape<1>, !fir.class<!fir.array<?x!fir.type<_QMtypesTt>>>) -> !hlfir.expr<?x!fir.type<_QMtypesTt>?> {
 ! CHECK:           %[[VAL_9:.*]]:3 = hlfir.associate %[[VAL_5]](%{{.*}}) {adapt.valuebyref} : (!hlfir.expr<?x!fir.type<_QMtypesTt>?>, !fir.shape<1>) -> (!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMtypesTt>>>>, !fir.class<!fir.heap<!fir.array<?x!fir.type<_QMtypesTt>>>>, i1)
 ! CHECK:           %[[VAL_10:.*]] = fir.rebox %[[VAL_9]]#0 : (!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMtypesTt>>>>) -> !fir.box<!fir.array<?x!fir.type<_QMtypesTt>>>
-! CHECK:           %[[VAL_11:.*]]:2 = hlfir.copy_in %[[VAL_10]] : (!fir.box<!fir.array<?x!fir.type<_QMtypesTt>>>) -> (!fir.box<!fir.array<?x!fir.type<_QMtypesTt>>>, i1)
+! CHECK:           %[[VAL_11:.*]]:2 = hlfir.copy_in %[[VAL_10]] to %[[TMP_BOX:.*]] : (!fir.box<!fir.array<?x!fir.type<_QMtypesTt>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<_QMtypesTt>>>>>) -> (!fir.box<!fir.array<?x!fir.type<_QMtypesTt>>>, i1)
 ! CHECK:           fir.call @_QMtypesPcallee(%[[VAL_11]]#0) fastmath<contract> : (!fir.box<!fir.array<?x!fir.type<_QMtypesTt>>>) -> ()
-! CHECK:           hlfir.copy_out %[[VAL_11]]#0, %[[VAL_11]]#1 : (!fir.box<!fir.array<?x!fir.type<_QMtypesTt>>>, i1) -> ()
+! CHECK:           hlfir.copy_out %[[TMP_BOX]], %[[VAL_11]]#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.type<_QMtypesTt>>>>>, i1) -> ()
 ! CHECK:           hlfir.end_associate %[[VAL_9]]#0, %[[VAL_9]]#2 : !fir.class<!fir.heap<!fir.array<?x!fir.type<_QMtypesTt>>>>, i1
 ! CHECK:           hlfir.destroy %[[VAL_5]] : !hlfir.expr<?x!fir.type<_QMtypesTt>?>

diff  --git a/flang/test/Lower/call-copy-in-out.f90 b/flang/test/Lower/call-copy-in-out.f90
index 304eb083e2704..253db7f05a6b6 100644
--- a/flang/test/Lower/call-copy-in-out.f90
+++ b/flang/test/Lower/call-copy-in-out.f90
@@ -33,12 +33,12 @@ subroutine test_assumed_shape_to_array(x)
 ! Copy-out
 ! CHECK-DAG:  %[[shape:.*]] = fir.shape %[[dim]]#1 : (index) -> !fir.shape<1>
 ! CHECK-DAG:  %[[temp_box:.*]] = fir.embox %[[addr]](%[[shape]]) : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
+! CHECK:  %[[rebox:.*]] = fir.rebox %[[temp_box]] : (!fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.heap<!fir.array<?xf32>>>
+! CHECK:  fir.store %[[rebox]] to %[[temp_box_ref:.*]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
 ! CHECK-DAG:  fir.store %[[x]] to %[[arg_box_loc:.*]] : !fir.ref<!fir.box<!fir.array<?xf32>>>
-! CHECK-DAG:  %[[skipToInit:.*]] = arith.constant true
 ! CHECK-DAG: %[[arg_box_addr:.*]] = fir.convert %[[arg_box_loc]] : (!fir.ref<!fir.box<!fir.array<?xf32>>>) -> !fir.ref<!fir.box<none>>
-! CHECK-DAG: %[[temp_box_cast:.*]] = fir.convert %[[temp_box]] : (!fir.box<!fir.array<?xf32>>) -> !fir.box<none>
-! CHECK-DAG: fir.call @_FortranACopyOutAssign(%[[arg_box_addr]], %[[temp_box_cast]], %[[skipToInit]], %{{.*}}, %{{.*}}){{.*}}: (!fir.ref<!fir.box<none>>, !fir.box<none>, i1, !fir.ref<i8>, i32) -> none
-! CHECK: fir.freemem %[[addr]] : !fir.heap<!fir.array<?xf32>>
+! CHECK-DAG: %[[temp_box_cast:.*]] = fir.convert %[[temp_box_ref]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
+! CHECK-DAG: fir.call @_FortranACopyOutAssign(%[[arg_box_addr]], %[[temp_box_cast]], %{{.*}}, %{{.*}}){{.*}}: (!fir.ref<!fir.box<none>>, !fir.ref<!fir.box<none>>, !fir.ref<i8>, i32) -> none
 
   call bar(x)
 end subroutine
@@ -69,7 +69,6 @@ subroutine eval_expr_only_once(x)
 ! CHECK:  fir.call @_FortranACopyOutAssign
 ! CHECK-NOT: fir.call @_QPonly_once()
 
-! CHECK: fir.freemem %[[addr]] : !fir.heap<!fir.array<?xf32>>
 end subroutine
 
 ! Test no copy-in/copy-out is generated for contiguous assumed shapes.
@@ -97,7 +96,6 @@ subroutine test_parenthesis(x)
 ! CHECK:  fir.call @_QPbar(%[[cast]]) {{.*}}: (!fir.ref<!fir.array<?xf32>>) -> ()
   call bar((x))
 ! CHECK-NOT:  fir.call @_FortranACopyOutAssign
-! CHECK: fir.freemem %[[temp]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK: return
 end subroutine
 
@@ -125,7 +123,6 @@ subroutine bar_intent_out(x)
   
 ! CHECK: fir.if %[[not_contiguous]]
 ! CHECK: fir.call @_FortranACopyOutAssign
-! CHECK: fir.freemem %[[addr]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK: return
 end subroutine
 
@@ -155,8 +152,8 @@ subroutine bar_intent_in(x)
 ! CHECK:  fir.call @_QPbar_intent_in(%[[cast]]) {{.*}}: (!fir.ref<!fir.array<100xf32>>) -> ()
   call bar_intent_in(x)
 ! CHECK: fir.if %[[not_contiguous]]
-! CHECK-NOT:  fir.call @_FortranACopyOutAssign
-! CHECK: fir.freemem %[[addr]] : !fir.heap<!fir.array<?xf32>>
+! CHECK: fir.zero
+! CHECK:  fir.call @_FortranACopyOutAssign
 ! CHECK: return
 end subroutine
 
@@ -183,7 +180,6 @@ subroutine bar_intent_inout(x)
   call bar_intent_inout(x)
 ! CHECK: fir.if %[[not_contiguous]]
 ! CHECK:  fir.call @_FortranACopyOutAssign
-! CHECK: fir.freemem %[[addr]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK: return
 end subroutine
 
@@ -221,12 +217,12 @@ subroutine test_char(x)
 ! CHECK:           fir.if %[[VAL_22]] {
 ! CHECK:             %[[VAL_26:.*]] = fir.shape %[[VAL_20]]#1 : (index) -> !fir.shape<1>
 ! CHECK:             %[[VAL_27:.*]] = fir.embox %[[VAL_24]](%[[VAL_26]]) : (!fir.heap<!fir.array<?x!fir.char<1,10>>>, !fir.shape<1>) -> !fir.box<!fir.array<?x!fir.char<1,10>>>
+! CHECK:             %[[REBOX:.*]] = fir.rebox %[[VAL_27]] : (!fir.box<!fir.array<?x!fir.char<1,10>>>) -> !fir.box<!fir.heap<!fir.array<?x!fir.char<1,10>>>>
+! CHECK:             fir.store %[[REBOX]] to %[[TMP_BOX_REF:.*]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,10>>>>>
 ! CHECK:             fir.store %[[VAL_0]] to %[[VAL_1]] : !fir.ref<!fir.box<!fir.array<?x!fir.char<1,10>>>>
-! CHECK:             %[[VAL_30:.*]] = arith.constant true
 ! CHECK:             %[[VAL_31:.*]] = fir.convert %[[VAL_1]] : (!fir.ref<!fir.box<!fir.array<?x!fir.char<1,10>>>>) -> !fir.ref<!fir.box<none>>
-! CHECK:             %[[VAL_32:.*]] = fir.convert %[[VAL_27]] : (!fir.box<!fir.array<?x!fir.char<1,10>>>) -> !fir.box<none>
-! CHECK:             %[[VAL_34:.*]] = fir.call @_FortranACopyOutAssign(%[[VAL_31]], %[[VAL_32]], %[[VAL_30]], %{{.*}}, %{{.*}}) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, i1, !fir.ref<i8>, i32) -> none
-! CHECK:             fir.freemem %[[VAL_24]] : !fir.heap<!fir.array<?x!fir.char<1,10>>>
+! CHECK:             %[[VAL_32:.*]] = fir.convert %[[TMP_BOX_REF]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,10>>>>>) -> !fir.ref<!fir.box<none>>
+! CHECK:             %[[VAL_34:.*]] = fir.call @_FortranACopyOutAssign(%[[VAL_31]], %[[VAL_32]], %{{.*}}, %{{.*}}) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.ref<!fir.box<none>>, !fir.ref<i8>, i32) -> none
 ! CHECK:           }
   
   character(10) :: x(:)

diff  --git a/flang/test/Lower/dummy-argument-assumed-shape-optional.f90 b/flang/test/Lower/dummy-argument-assumed-shape-optional.f90
index dfd40acf9ee10..5e52459a24309 100644
--- a/flang/test/Lower/dummy-argument-assumed-shape-optional.f90
+++ b/flang/test/Lower/dummy-argument-assumed-shape-optional.f90
@@ -41,7 +41,6 @@ subroutine test_assumed_shape_to_contiguous(x)
 ! CHECK:  fir.call @_QPtakes_contiguous(%[[VAL_25]]) {{.*}}: (!fir.box<!fir.array<?xf32>>) -> ()
 ! CHECK:  fir.if %[[VAL_23]] {
 ! CHECK:    fir.call @_FortranACopyOutAssign
-! CHECK:    fir.freemem %[[VAL_3]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:  }
 ! CHECK:  return
 ! CHECK:}
@@ -86,7 +85,6 @@ subroutine test_assumed_shape_opt_to_contiguous(x)
 ! CHECK:  fir.call @_QPtakes_contiguous(%[[VAL_25]]) {{.*}}: (!fir.box<!fir.array<?xf32>>) -> ()
 ! CHECK:  fir.if %[[VAL_23]] {
 ! CHECK:    fir.call @_FortranACopyOutAssign
-! CHECK:    fir.freemem %[[VAL_3]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:  }
 ! CHECK:  return
 ! CHECK:}
@@ -132,7 +130,6 @@ subroutine test_assumed_shape_to_contiguous_opt(x)
 ! CHECK:  fir.call @_QPtakes_contiguous_optional(%[[VAL_25]]) {{.*}}: (!fir.box<!fir.array<?xf32>>) -> ()
 ! CHECK:  fir.if %[[VAL_23]] {
 ! CHECK:    fir.call @_FortranACopyOutAssign
-! CHECK:    fir.freemem %[[VAL_3]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:  }
 ! CHECK:  return
 ! CHECK:}
@@ -192,7 +189,6 @@ subroutine test_assumed_shape_opt_to_contiguous_opt(x)
 ! CHECK:  fir.call @_QPtakes_contiguous_optional(%[[VAL_38]]) {{.*}}: (!fir.box<!fir.array<?xf32>>) -> ()
 ! CHECK:  fir.if %[[VAL_33]] {
 ! CHECK:    fir.call @_FortranACopyOutAssign
-! CHECK:    fir.freemem %[[VAL_9]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:  }
 ! CHECK:  return
 ! CHECK:}
@@ -255,7 +251,6 @@ subroutine test_pointer_to_contiguous_opt(x)
 ! CHECK:  fir.call @_QPtakes_contiguous_optional(%[[VAL_41]]) {{.*}}: (!fir.box<!fir.array<?xf32>>) -> ()
 ! CHECK:  fir.if %[[VAL_36]] {
 ! CHECK:    fir.call @_FortranACopyOutAssign
-! CHECK:    fir.freemem %[[VAL_11]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:  }
 ! CHECK:  return
 ! CHECK:}
@@ -324,7 +319,6 @@ subroutine test_pointer_opt_to_contiguous_opt(x)
 ! CHECK:  fir.call @_QPtakes_contiguous_optional(%[[VAL_41]]) {{.*}}: (!fir.box<!fir.array<?xf32>>) -> ()
 ! CHECK:  fir.if %[[VAL_36]] {
 ! CHECK:    fir.call @_FortranACopyOutAssign
-! CHECK:    fir.freemem %[[VAL_11]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:  }
 ! CHECK:  return
 ! CHECK:}

diff  --git a/flang/test/Lower/dummy-argument-optional-2.f90 b/flang/test/Lower/dummy-argument-optional-2.f90
index 1fc7a87c6f3fc..cc026132bd081 100644
--- a/flang/test/Lower/dummy-argument-optional-2.f90
+++ b/flang/test/Lower/dummy-argument-optional-2.f90
@@ -121,7 +121,6 @@ subroutine pass_pointer_array(i)
 ! CHECK:         fir.call @_QPtakes_opt_explicit_shape(%[[VAL_29]]) {{.*}}: (!fir.ref<!fir.array<100xf32>>) -> ()
 ! CHECK:         fir.if %[[and]] {
 ! CHECK:           fir.call @_FortranACopyOutAssign
-! CHECK:           fir.freemem %[[VAL_9]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:         }
 end subroutine
 
@@ -157,7 +156,6 @@ subroutine pass_pointer_array_char(c)
 ! CHECK:         fir.call @_QPtakes_opt_explicit_shape_char(%[[VAL_52]]) {{.*}}: (!fir.boxchar<1>) -> ()
 ! CHECK:         fir.if %[[and]] {
 ! CHECK:           fir.call @_FortranACopyOutAssign
-! CHECK:           fir.freemem %[[VAL_9]] : !fir.heap<!fir.array<?x!fir.char<1,?>>>
 ! CHECK:         }
 ! CHECK:         return
 ! CHECK:       }
@@ -192,7 +190,6 @@ subroutine forward_pointer_array()
 ! CHECK:         fir.call @_QPtakes_opt_explicit_shape(%[[VAL_14]]) {{.*}}: (!fir.ref<!fir.array<100xf32>>) -> ()
 ! CHECK:         fir.if %[[and]] {
 ! CHECK:           fir.call @_FortranACopyOutAssign
-! CHECK:           fir.freemem %[[VAL_7]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:         }
 end subroutine
 
@@ -231,7 +228,6 @@ subroutine pass_opt_assumed_shape(x)
 ! CHECK:         fir.call @_QPtakes_opt_explicit_shape(%[[VAL_26]]) {{.*}}: (!fir.ref<!fir.array<100xf32>>) -> ()
 ! CHECK:         fir.if %[[and]] {
 ! CHECK:           fir.call @_FortranACopyOutAssign
-! CHECK:           fir.freemem %[[VAL_27]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:         }
 end subroutine
 
@@ -270,7 +266,6 @@ subroutine pass_opt_assumed_shape_char(c)
 ! CHECK:         fir.call @_QPtakes_opt_explicit_shape_char(%[[VAL_50]]) {{.*}}: (!fir.boxchar<1>) -> ()
 ! CHECK:         fir.if %[[and]] {
 ! CHECK:           fir.call @_FortranACopyOutAssign
-! CHECK:           fir.freemem %[[VAL_49]] : !fir.heap<!fir.array<?x!fir.char<1,?>>>
 ! CHECK:         }
 end subroutine
 
@@ -403,8 +398,8 @@ subroutine pass_opt_assumed_shape_to_intentin(x)
 ! CHECK:         %[[VAL_24:.*]] = fir.convert %[[VAL_7]] : (!fir.heap<!fir.array<?xf32>>) -> !fir.ref<!fir.array<100xf32>>
 ! CHECK:         fir.call @_QPtakes_opt_explicit_shape_intentin(%[[VAL_24]]) {{.*}}: (!fir.ref<!fir.array<100xf32>>) -> ()
 ! CHECK:         fir.if %[[and]] {
-! CHECK-NOT:       fir.call @_FortranACopyOutAssign
-! CHECK:           fir.freemem %[[VAL_7]] : !fir.heap<!fir.array<?xf32>>
+! CHECK:           fir.zero
+! CHECK:           fir.call @_FortranACopyOutAssign
 ! CHECK:         }
 end subroutine
 
@@ -435,7 +430,6 @@ subroutine pass_opt_assumed_shape_to_intentout(x)
 ! CHECK:         fir.call @_QPtakes_opt_explicit_shape_intentout(%[[VAL_14]]) {{.*}}: (!fir.ref<!fir.array<100xf32>>) -> ()
 ! CHECK:         fir.if %[[and]] {
 ! CHECK:           fir.call @_FortranACopyOutAssign
-! CHECK:           fir.freemem %[[VAL_7]] : !fir.heap<!fir.array<?xf32>>
 ! CHECK:         }
 end subroutine
 end module

diff  --git a/flang/test/Lower/optional-value-caller.f90 b/flang/test/Lower/optional-value-caller.f90
index a1e6ebf3e2182..31bf326dd1df1 100644
--- a/flang/test/Lower/optional-value-caller.f90
+++ b/flang/test/Lower/optional-value-caller.f90
@@ -298,8 +298,8 @@ subroutine test_dyn_array_from_assumed(i, n)
 ! CHECK:  %[[VAL_25:.*]] = fir.convert %[[VAL_8]] : (!fir.heap<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
 ! CHECK:  fir.call @_QPdyn_array(%[[VAL_25]], %[[VAL_1]]) {{.*}}: (!fir.ref<!fir.array<?xi32>>, !fir.ref<i64>) -> ()
 ! CHECK:  fir.if %[[and]] {
-! CHECK-NOT: fir.call @_FortranACopyOutAssign
-! CHECK:    fir.freemem %[[VAL_8]] : !fir.heap<!fir.array<?xi32>>
+! CHECK:    fir.zero
+! CHECK:    fir.call @_FortranACopyOutAssign
 ! CHECK:  }
 end subroutine
 
@@ -347,7 +347,8 @@ subroutine test_array_ptr(i)
 ! CHECK:         %[[VAL_29:.*]] = fir.convert %[[VAL_30:.*]] : (!fir.heap<!fir.array<?xi32>>) -> !fir.ref<!fir.array<100xi32>>
 ! CHECK:         fir.call @_QParray(%[[VAL_29]]) fastmath<contract> : (!fir.ref<!fir.array<100xi32>>) -> ()
 ! CHECK:         fir.if %[[VAL_28]] {
-! CHECK:           fir.freemem %[[VAL_30]] : !fir.heap<!fir.array<?xi32>>
+! CHECK:           fir.zero
+! CHECK:           fir.call @_FortranACopyOutAssign
 ! CHECK:         }
 ! CHECK:         return
 ! CHECK:       }
@@ -455,7 +456,8 @@ subroutine test_char_array(c)
 ! CHECK:         %[[VAL_35:.*]] = fir.emboxchar %[[VAL_33]], %[[VAL_29]] : (!fir.ref<!fir.char<1,?>>, index) -> !fir.boxchar<1>
 ! CHECK:         fir.call @_QPdyn_char_array(%[[VAL_35]], %[[VAL_2]]) fastmath<contract> : (!fir.boxchar<1>, !fir.ref<i64>) -> ()
 ! CHECK:         fir.if %[[VAL_32]] {
-! CHECK:           fir.freemem %[[VAL_34]] : !fir.heap<!fir.array<?x!fir.char<1,?>>>
+! CHECK:          fir.zero
+! CHECK:          fir.call @_FortranACopyOutAssign
 ! CHECK:         }
 ! CHECK:         return
 ! CHECK:       }


        


More information about the flang-commits mailing list