[flang-commits] [flang] [flang] fix early free of allocatable function result in WHERE mask (PR #178691)

via flang-commits flang-commits at lists.llvm.org
Wed Feb 4 03:28:40 PST 2026


https://github.com/jeanPerier updated https://github.com/llvm/llvm-project/pull/178691

>From d921d1cf80ee404d474ed70d6837b9cde8577f8c Mon Sep 17 00:00:00 2001
From: Jean Perier <jperier at nvidia.com>
Date: Thu, 29 Jan 2026 07:58:39 -0800
Subject: [PATCH 1/5] [flang] fix early free of allocatable function result in
 WHERE mask

---
 flang/lib/Lower/ConvertCall.cpp               | 36 +++++++---
 .../Lower/HLFIR/function-return-as-expr.f90   |  6 +-
 flang/test/Lower/HLFIR/where.f90              |  8 +--
 flang/test/Lower/io-statement-clean-ups.f90   | 70 +++++++++++--------
 4 files changed, 75 insertions(+), 45 deletions(-)

diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index 179626624822d..569347a0e1e22 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -828,7 +828,13 @@ Fortran::lower::genCallOpAndResult(
             mustFinalizeResult, callOp};
   }
 
-  if (allocatedResult) {
+  // Insert clean-up for the result.
+  // In HLFIR, this is skipped when the result does not need to be finalized
+  // because the result is moved to an expression that will deal with the
+  // finalization.
+  if (allocatedResult &&
+      (mustFinalizeResult ||
+       !converter.getLoweringOptions().getLowerToHighLevelFIR())) {
     // The result must be optionally destroyed (if it is of a derived type
     // that may need finalization or deallocation of the components).
     // For an allocatable result we have to free the memory allocated
@@ -856,7 +862,7 @@ Fortran::lower::genCallOpAndResult(
     bool resultIsFinalized = false;
     // Check if the derived-type is finalizable if it is a monomorphic
     // derived-type.
-    // For polymorphic and unlimited polymorphic enities call the runtime
+    // For polymorphic and unlimited polymorphic entities call the runtime
     // in any cases.
     if (mustFinalizeResult) {
       if (retTy->IsPolymorphic() || retTy->IsUnlimitedPolymorphic()) {
@@ -887,6 +893,10 @@ Fortran::lower::genCallOpAndResult(
     return {LoweredResult{*allocatedResult}, resultIsFinalized, callOp};
   }
 
+  if (allocatedResult)
+    return {LoweredResult{*allocatedResult}, /*resultIsFinalized=*/false,
+            callOp};
+
   // subroutine call
   if (!resultType)
     return {LoweredResult{fir::ExtendedValue{mlir::Value{}}},
@@ -1973,17 +1983,25 @@ genUserCall(Fortran::lower::PreparedActualArguments &loweredActuals,
   if (!resultIsFinalized) {
     hlfir::Entity resultEntity = extendedValueToHlfirEntity(
         loc, builder, result, tempResultName, /*insertBefore=*/callOp);
+    // Allocatable result must be freed, other results are stack allocated.
+    const auto *allocatable = result.getBoxOf<fir::MutableBoxValue>();
+    bool mustFree = allocatable != nullptr;
     resultEntity = loadTrivialScalar(loc, builder, resultEntity);
     if (resultEntity.isVariable()) {
       // If the result has no finalization, it can be moved into an expression.
-      // In such case, the expression should not be freed after its use since
-      // the result is stack allocated or deallocation (for allocatable results)
-      // was already inserted in genCallOpAndResult.
-      auto asExpr =
-          hlfir::AsExprOp::create(builder, loc, resultEntity,
-                                  /*mustFree=*/builder.createBool(loc, false));
-      return hlfir::EntityWithAttributes{asExpr.getResult()};
+      // In such case, the expression.
+      mlir::Value asExpr = hlfir::AsExprOp::create(
+          builder, loc, resultEntity, builder.createBool(loc, mustFree));
+      callContext.stmtCtx.attachCleanup([bldr = &builder, loc, asExpr]() {
+        hlfir::DestroyOp::create(*bldr, loc, asExpr, /*finalize=*/false);
+      });
+      return hlfir::EntityWithAttributes{asExpr};
     }
+    if (allocatable)
+      callContext.stmtCtx.attachCleanup(
+          [bldr = &builder, loc, box = *allocatable]() {
+            fir::factory::genFreememIfAllocated(*bldr, loc, box);
+          });
     return hlfir::EntityWithAttributes{resultEntity};
   }
   // If the result has finalization, it cannot be moved because use of its
diff --git a/flang/test/Lower/HLFIR/function-return-as-expr.f90 b/flang/test/Lower/HLFIR/function-return-as-expr.f90
index 3b43b6e261db1..332b0aea9526a 100644
--- a/flang/test/Lower/HLFIR/function-return-as-expr.f90
+++ b/flang/test/Lower/HLFIR/function-return-as-expr.f90
@@ -37,9 +37,10 @@ end subroutine test2
 ! CHECK:           %[[VAL_10:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.char<1,?>>>>
 ! CHECK:           %[[VAL_11:.*]] = fir.box_elesize %[[VAL_10]] : (!fir.box<!fir.heap<!fir.char<1,?>>>) -> index
 ! CHECK:           %[[VAL_12:.*]] = fir.emboxchar %[[VAL_9]], %[[VAL_11]] : (!fir.heap<!fir.char<1,?>>, index) -> !fir.boxchar<1>
-! CHECK:           %[[VAL_13:.*]] = arith.constant false
+! CHECK:           %[[VAL_13:.*]] = arith.constant true
 ! CHECK:           %[[VAL_14:.*]] = hlfir.as_expr %[[VAL_12]] move %[[VAL_13]] : (!fir.boxchar<1>, i1) -> !hlfir.expr<!fir.char<1,?>>
 ! CHECK:           hlfir.assign %[[VAL_14]] to %{{.*}}#0 realloc : !hlfir.expr<!fir.char<1,?>>, !fir.ref<!fir.box<!fir.heap<!fir.char<1,?>>>>
+! CHECK:           hlfir.destroy %[[VAL_14]]
 
 subroutine test3
   character(len=:), allocatable :: c
@@ -53,9 +54,10 @@ end subroutine test3
 ! CHECK:           %[[VAL_13:.*]]:2 = hlfir.declare %{{.*}} typeparams %{{.*}} {uniq_name = ".tmp.func_result"} : (!fir.ref<!fir.box<!fir.heap<!fir.char<1,3>>>>, index) -> (!fir.ref<!fir.box<!fir.heap<!fir.char<1,3>>>>, !fir.ref<!fir.box<!fir.heap<!fir.char<1,3>>>>)
 ! CHECK:           %[[VAL_14:.*]] = fir.load %[[VAL_13]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.char<1,3>>>>
 ! CHECK:           %[[VAL_15:.*]] = fir.box_addr %[[VAL_14]] : (!fir.box<!fir.heap<!fir.char<1,3>>>) -> !fir.heap<!fir.char<1,3>>
-! CHECK:           %[[VAL_16:.*]] = arith.constant false
+! CHECK:           %[[VAL_16:.*]] = arith.constant true
 ! CHECK:           %[[VAL_17:.*]] = hlfir.as_expr %[[VAL_15]] move %[[VAL_16]] : (!fir.heap<!fir.char<1,3>>, i1) -> !hlfir.expr<!fir.char<1,3>>
 ! CHECK:           hlfir.assign %[[VAL_17]] to %{{.*}}#0 realloc : !hlfir.expr<!fir.char<1,3>>, !fir.ref<!fir.box<!fir.heap<!fir.char<1,?>>>>
+! CHECK:           hlfir.destroy %[[VAL_17]]
 
 subroutine test4
   class(*), allocatable :: p
diff --git a/flang/test/Lower/HLFIR/where.f90 b/flang/test/Lower/HLFIR/where.f90
index dea344214928a..0081d7751239b 100644
--- a/flang/test/Lower/HLFIR/where.f90
+++ b/flang/test/Lower/HLFIR/where.f90
@@ -80,10 +80,10 @@ subroutine where_cleanup()
 ! CHECK:    %[[VAL_6:.*]] = fir.call @_QPreturn_temporary_mask() fastmath<contract> : () -> !fir.box<!fir.heap<!fir.array<?x!fir.logical<4>>>>
 ! CHECK:    fir.save_result %[[VAL_6]] to %[[VAL_7]]#0 : !fir.box<!fir.heap<!fir.array<?x!fir.logical<4>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.logical<4>>>>>
 ! CHECK:             %[[deref:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.logical<4>>>>>
-! CHECK:             %[[MustFree:.*]] = arith.constant false
+! CHECK:             %[[MustFree:.*]] = arith.constant true
 ! CHECK:             %[[ResTemp:.*]] = hlfir.as_expr %[[deref]] move %[[MustFree]] : (!fir.box<!fir.heap<!fir.array<?x!fir.logical<4>>>>, i1) -> !hlfir.expr<?x!fir.logical<4>>
 ! CHECK:             hlfir.yield %[[ResTemp]] : !hlfir.expr<?x!fir.logical<4>> cleanup {
-! CHECK:        fir.freemem
+! CHECK:        hlfir.destroy %[[ResTemp]]
 ! CHECK:    }
 ! CHECK:  } do {
 ! CHECK:    hlfir.region_assign {
@@ -91,10 +91,10 @@ subroutine where_cleanup()
 ! CHECK:      %[[VAL_14:.*]] = fir.call @_QPreturn_temporary_array() fastmath<contract> : () -> !fir.box<!fir.heap<!fir.array<?xf32>>>
 ! CHECK:      fir.save_result %[[VAL_14]] to %[[VAL_15]]#0 : !fir.box<!fir.heap<!fir.array<?xf32>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
 ! CHECK:               %[[deref:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
-! CHECK:               %[[MustFree:.*]] = arith.constant false
+! CHECK:               %[[MustFree:.*]] = arith.constant true
 ! CHECK:               %[[ResTemp:.*]] = hlfir.as_expr %[[deref]] move %[[MustFree]] : (!fir.box<!fir.heap<!fir.array<?xf32>>>, i1) -> !hlfir.expr<?xf32>
 ! CHECK:               hlfir.yield %[[ResTemp]] : !hlfir.expr<?xf32> cleanup {
-! CHECK:          fir.freemem
+! CHECK:        hlfir.destroy %[[ResTemp]]
 ! CHECK:      }
 ! CHECK:    } to {
 ! CHECK:      hlfir.yield %[[VAL_5]]#0 : !fir.ref<!fir.array<10xf32>>
diff --git a/flang/test/Lower/io-statement-clean-ups.f90 b/flang/test/Lower/io-statement-clean-ups.f90
index 7e590e7236322..c8a48e8f7bcf0 100644
--- a/flang/test/Lower/io-statement-clean-ups.f90
+++ b/flang/test/Lower/io-statement-clean-ups.f90
@@ -13,34 +13,44 @@ function gen_temp_character()
   end interface
   integer :: status
   open (10, encoding=gen_temp_character(), file=gen_temp_character(), pad=gen_temp_character(), iostat=status)
-! CHECK:  %[[VAL_1:.*]] = fir.alloca !fir.box<!fir.heap<!fir.char<1,?>>>
-! CHECK:  %[[VAL_2:.*]] = fir.alloca !fir.box<!fir.heap<!fir.char<1,?>>>
-! CHECK:  %[[VAL_3:.*]] = fir.alloca !fir.box<!fir.heap<!fir.char<1,?>>>
-! CHECK:  fir.call @_FortranAioBeginOpenUnit
-! CHECK:  %[[DECLARE3:.*]] = fir.declare %[[VAL_3]]
-! CHECK:  %[[VAL_15:.*]] = fir.call @_QPgen_temp_character() {{.*}}: () -> !fir.box<!fir.heap<!fir.char<1,?>>>
-! CHECK:  fir.save_result %[[VAL_15]] to %[[DECLARE3]] : !fir.box<!fir.heap<!fir.char<1,?>>>, !fir.ref<!fir.box<!fir.heap<!fir.char<1,?>>>>
-! CHECK:  %[[VAL_21:.*]] = fir.call @_FortranAioSetEncoding
-! CHECK:  %[[VAL_22:.*]] = fir.load %[[VAL_3]] : !fir.ref<!fir.box<!fir.heap<!fir.char<1,?>>>>
-! CHECK:  %[[VAL_23:.*]] = fir.box_addr %[[VAL_22]] : (!fir.box<!fir.heap<!fir.char<1,?>>>) -> !fir.heap<!fir.char<1,?>>
-! CHECK:  fir.freemem %[[VAL_23]] : !fir.heap<!fir.char<1,?>>
-! CHECK:  fir.if %[[VAL_21]] {
-! CHECK:    %[[DECLARE2:.*]] = fir.declare %[[VAL_2]]
-! CHECK:    %[[VAL_27:.*]] = fir.call @_QPgen_temp_character() {{.*}}: () -> !fir.box<!fir.heap<!fir.char<1,?>>>
-! CHECK:    fir.save_result %[[VAL_27]] to %[[DECLARE2]] : !fir.box<!fir.heap<!fir.char<1,?>>>, !fir.ref<!fir.box<!fir.heap<!fir.char<1,?>>>>
-! CHECK:    %[[VAL_33:.*]] = fir.call @_FortranAioSetFile
-! CHECK:    %[[VAL_34:.*]] = fir.load %[[VAL_2]] : !fir.ref<!fir.box<!fir.heap<!fir.char<1,?>>>>
-! CHECK:    %[[VAL_35:.*]] = fir.box_addr %[[VAL_34]] : (!fir.box<!fir.heap<!fir.char<1,?>>>) -> !fir.heap<!fir.char<1,?>>
-! CHECK:    fir.freemem %[[VAL_35]] : !fir.heap<!fir.char<1,?>>
-! CHECK:    fir.if %[[VAL_33]] {
-! CHECK:      %[[DECLARE1:.*]] = fir.declare %[[VAL_1]]
-! CHECK:      %[[VAL_39:.*]] = fir.call @_QPgen_temp_character() {{.*}}: () -> !fir.box<!fir.heap<!fir.char<1,?>>>
-! CHECK:      fir.save_result %[[VAL_39]] to %[[DECLARE1]] : !fir.box<!fir.heap<!fir.char<1,?>>>, !fir.ref<!fir.box<!fir.heap<!fir.char<1,?>>>>
-! CHECK:      fir.call @_FortranAioSetPad
-! CHECK:      %[[VAL_46:.*]] = fir.load %[[VAL_1]] : !fir.ref<!fir.box<!fir.heap<!fir.char<1,?>>>>
-! CHECK:      %[[VAL_47:.*]] = fir.box_addr %[[VAL_46]] : (!fir.box<!fir.heap<!fir.char<1,?>>>) -> !fir.heap<!fir.char<1,?>>
-! CHECK:      fir.freemem %[[VAL_47]] : !fir.heap<!fir.char<1,?>>
-! CHECK:    }
-! CHECK:  }
-! CHECK:  fir.call @_FortranAioEndIoStatement
+! CHECK:           %[[ALLOCA_0:.*]] = fir.alloca !fir.box<!fir.heap<!fir.char<1,?>>> {bindc_name = ".result"}
+! CHECK:           %[[ALLOCA_1:.*]] = fir.alloca !fir.box<!fir.heap<!fir.char<1,?>>> {bindc_name = ".result"}
+! CHECK:           %[[ALLOCA_2:.*]] = fir.alloca !fir.box<!fir.heap<!fir.char<1,?>>> {bindc_name = ".result"}
+! CHECK:           fir.call @_FortranAioBeginOpenUnit
+! CHECK:           %[[DECLARE_1:.*]] = fir.declare %[[ALLOCA_2]] {uniq_name = ".tmp.func_result"} : (!fir.ref<!fir.box<!fir.heap<!fir.char<1,?>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.char<1,?>>>>
+! CHECK:           %[[CALL_1:.*]] = fir.call @_QPgen_temp_character() {{.*}}: () -> !fir.box<!fir.heap<!fir.char<1,?>>>
+! CHECK:           fir.save_result %[[CALL_1]] to %[[DECLARE_1]] : !fir.box<!fir.heap<!fir.char<1,?>>>, !fir.ref<!fir.box<!fir.heap<!fir.char<1,?>>>>
+! CHECK:           %[[LOAD_0:.*]] = fir.load %[[DECLARE_1]] : !fir.ref<!fir.box<!fir.heap<!fir.char<1,?>>>>
+! CHECK:           %[[BOX_ADDR_0:.*]] = fir.box_addr %[[LOAD_0]] : (!fir.box<!fir.heap<!fir.char<1,?>>>) -> !fir.heap<!fir.char<1,?>>
+! CHECK:           %[[EMBOXCHAR_0:.*]] = fir.emboxchar %[[BOX_ADDR_0]], %{{.*}} : (!fir.heap<!fir.char<1,?>>, index) -> !fir.boxchar<1>
+! CHECK:           %[[BOX_ADDR_1:.*]] = fir.box_addr %[[EMBOXCHAR_0]] : (!fir.boxchar<1>) -> !fir.ref<!fir.char<1,?>>
+! CHECK:           %[[CALL_2:.*]] = fir.call @_FortranAioSetEncoding
+! CHECK:           %[[CONVERT_3:.*]] = fir.convert %[[BOX_ADDR_1]] : (!fir.ref<!fir.char<1,?>>) -> !fir.heap<!fir.char<1,?>>
+! CHECK:           fir.freemem %[[CONVERT_3]] : !fir.heap<!fir.char<1,?>>
+! CHECK:           fir.if %[[CALL_2]] {
+! CHECK:             %[[DECLARE_2:.*]] = fir.declare %[[ALLOCA_1]] {uniq_name = ".tmp.func_result"} : (!fir.ref<!fir.box<!fir.heap<!fir.char<1,?>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.char<1,?>>>>
+! CHECK:             %[[CALL_3:.*]] = fir.call @_QPgen_temp_character() {{.*}}: () -> !fir.box<!fir.heap<!fir.char<1,?>>>
+! CHECK:             fir.save_result %[[CALL_3]] to %[[DECLARE_2]] : !fir.box<!fir.heap<!fir.char<1,?>>>, !fir.ref<!fir.box<!fir.heap<!fir.char<1,?>>>>
+! CHECK:             %[[LOAD_1:.*]] = fir.load %[[DECLARE_2]] : !fir.ref<!fir.box<!fir.heap<!fir.char<1,?>>>>
+! CHECK:             %[[BOX_ADDR_2:.*]] = fir.box_addr %[[LOAD_1]] : (!fir.box<!fir.heap<!fir.char<1,?>>>) -> !fir.heap<!fir.char<1,?>>
+! CHECK:             %[[EMBOXCHAR_1:.*]] = fir.emboxchar %[[BOX_ADDR_2]], %{{.*}} : (!fir.heap<!fir.char<1,?>>, index) -> !fir.boxchar<1>
+! CHECK:             %[[BOX_ADDR_3:.*]] = fir.box_addr %[[EMBOXCHAR_1]] : (!fir.boxchar<1>) -> !fir.ref<!fir.char<1,?>>
+! CHECK:             %[[CALL_4:.*]] = fir.call @_FortranAioSetFile
+! CHECK:             %[[CONVERT_6:.*]] = fir.convert %[[BOX_ADDR_3]] : (!fir.ref<!fir.char<1,?>>) -> !fir.heap<!fir.char<1,?>>
+! CHECK:             fir.freemem %[[CONVERT_6]] : !fir.heap<!fir.char<1,?>>
+! CHECK:             fir.if %[[CALL_4]] {
+! CHECK:               %[[DECLARE_3:.*]] = fir.declare %[[ALLOCA_0]] {uniq_name = ".tmp.func_result"} : (!fir.ref<!fir.box<!fir.heap<!fir.char<1,?>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.char<1,?>>>>
+! CHECK:               %[[CALL_5:.*]] = fir.call @_QPgen_temp_character() {{.*}}: () -> !fir.box<!fir.heap<!fir.char<1,?>>>
+! CHECK:               fir.save_result %[[CALL_5]] to %[[DECLARE_3]] : !fir.box<!fir.heap<!fir.char<1,?>>>, !fir.ref<!fir.box<!fir.heap<!fir.char<1,?>>>>
+! CHECK:               %[[LOAD_2:.*]] = fir.load %[[DECLARE_3]] : !fir.ref<!fir.box<!fir.heap<!fir.char<1,?>>>>
+! CHECK:               %[[BOX_ADDR_4:.*]] = fir.box_addr %[[LOAD_2]] : (!fir.box<!fir.heap<!fir.char<1,?>>>) -> !fir.heap<!fir.char<1,?>>
+! CHECK:               %[[EMBOXCHAR_2:.*]] = fir.emboxchar %[[BOX_ADDR_4]], %{{.*}} : (!fir.heap<!fir.char<1,?>>, index) -> !fir.boxchar<1>
+! CHECK:               %[[BOX_ADDR_5:.*]] = fir.box_addr %[[EMBOXCHAR_2]] : (!fir.boxchar<1>) -> !fir.ref<!fir.char<1,?>>
+! CHECK:               fir.call @_FortranAioSetPad
+! CHECK:               %[[CONVERT_9:.*]] = fir.convert %[[BOX_ADDR_5]] : (!fir.ref<!fir.char<1,?>>) -> !fir.heap<!fir.char<1,?>>
+! CHECK:               fir.freemem %[[CONVERT_9]] : !fir.heap<!fir.char<1,?>>
+! CHECK:             }
+! CHECK:           }
+! CHECK:           fir.call @_FortranAioEndIoStatement
+
 end subroutine

>From 17e78bfc95baac1c988d731251f9448059bf2e7a Mon Sep 17 00:00:00 2001
From: Jean Perier <jperier at nvidia.com>
Date: Fri, 30 Jan 2026 06:11:46 -0800
Subject: [PATCH 2/5] remove left over comment

---
 flang/lib/Lower/ConvertCall.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index 569347a0e1e22..72ccec0ab535a 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -1989,7 +1989,6 @@ genUserCall(Fortran::lower::PreparedActualArguments &loweredActuals,
     resultEntity = loadTrivialScalar(loc, builder, resultEntity);
     if (resultEntity.isVariable()) {
       // If the result has no finalization, it can be moved into an expression.
-      // In such case, the expression.
       mlir::Value asExpr = hlfir::AsExprOp::create(
           builder, loc, resultEntity, builder.createBool(loc, mustFree));
       callContext.stmtCtx.attachCleanup([bldr = &builder, loc, asExpr]() {

>From ec5b6722ec6cfa36254af7035f4f2da3bff1b4ce Mon Sep 17 00:00:00 2001
From: Jean Perier <jperier at nvidia.com>
Date: Mon, 2 Feb 2026 06:55:55 -0800
Subject: [PATCH 3/5] ensure mustFinalizeResult and resultIsFinalized are in
 sync

---
 flang/lib/Lower/ConvertCall.cpp               | 60 ++++++++-----------
 .../Lower/HLFIR/function-return-as-expr.f90   |  6 +-
 .../Lower/HLFIR/function-return-destroy.f90   |  6 +-
 3 files changed, 34 insertions(+), 38 deletions(-)

diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index 72ccec0ab535a..d4119f9e8c9d6 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -342,6 +342,22 @@ getTypeWithIgnoreTkrC(mlir::FunctionType funcType,
   return std::nullopt;
 }
 
+static bool mustDestroyOrFinalizeFunctionResult(
+    mlir::FunctionType callSiteType,
+    std::optional<Fortran::evaluate::DynamicType> retTy) {
+  if (callSiteType.getNumResults() == 0 || !retTy.has_value())
+    return false;
+  if (fir::isPointerType(callSiteType.getResult(0)))
+    return false;
+  if (retTy->IsPolymorphic() || retTy->IsUnlimitedPolymorphic())
+    return true;
+  if (retTy->category() != Fortran::common::TypeCategory::Derived)
+    return false;
+  return Fortran::semantics::MayRequireFinalization(
+             retTy->GetDerivedTypeSpec()) ||
+         hlfir::mayHaveAllocatableComponent(callSiteType.getResult(0));
+}
+
 std::tuple<Fortran::lower::LoweredResult, bool, mlir::Operation *>
 Fortran::lower::genCallOpAndResult(
     mlir::Location loc, Fortran::lower::AbstractConverter &converter,
@@ -803,10 +819,7 @@ Fortran::lower::genCallOpAndResult(
   // the resulting array result will be finalized/destroyed
   // as needed by hlfir.destroy.
   const bool mustFinalizeResult =
-      !isElemental && callSiteType.getNumResults() > 0 &&
-      !fir::isPointerType(callSiteType.getResult(0)) && retTy.has_value() &&
-      (retTy->category() == Fortran::common::TypeCategory::Derived ||
-       retTy->IsPolymorphic() || retTy->IsUnlimitedPolymorphic());
+      !isElemental && mustDestroyOrFinalizeFunctionResult(callSiteType, retTy);
 
   if (caller.mustSaveResult()) {
     assert(allocatedResult.has_value());
@@ -859,38 +872,17 @@ Fortran::lower::genCallOpAndResult(
         [](const auto &) {});
 
     // 7.5.6.3 point 5. Derived-type finalization for nonpointer function.
-    bool resultIsFinalized = false;
-    // Check if the derived-type is finalizable if it is a monomorphic
-    // derived-type.
-    // For polymorphic and unlimited polymorphic entities call the runtime
-    // in any cases.
+    // Note that this is also done for derived type with no final routines
+    // that have allocatable components to ensure the allocatable
+    // components are deallocated.
     if (mustFinalizeResult) {
-      if (retTy->IsPolymorphic() || retTy->IsUnlimitedPolymorphic()) {
-        auto *bldr = &converter.getFirOpBuilder();
-        stmtCtx.attachCleanup([bldr, loc, allocatedResult]() {
-          fir::runtime::genDerivedTypeDestroy(*bldr, loc,
-                                              fir::getBase(*allocatedResult));
-        });
-        resultIsFinalized = true;
-      } else {
-        const Fortran::semantics::DerivedTypeSpec &typeSpec =
-            retTy->GetDerivedTypeSpec();
-        // If the result type may require finalization
-        // or have allocatable components, we need to make sure
-        // everything is properly finalized/deallocated.
-        if (Fortran::semantics::MayRequireFinalization(typeSpec) ||
-            // We can use DerivedTypeDestroy even if finalization is not needed.
-            hlfir::mayHaveAllocatableComponent(funcType.getResults()[0])) {
-          auto *bldr = &converter.getFirOpBuilder();
-          stmtCtx.attachCleanup([bldr, loc, allocatedResult]() {
-            mlir::Value box = bldr->createBox(loc, *allocatedResult);
-            fir::runtime::genDerivedTypeDestroy(*bldr, loc, box);
-          });
-          resultIsFinalized = true;
-        }
-      }
+      auto *bldr = &converter.getFirOpBuilder();
+      stmtCtx.attachCleanup([bldr, loc, allocatedResult]() {
+        mlir::Value box = bldr->createBox(loc, *allocatedResult);
+        fir::runtime::genDerivedTypeDestroy(*bldr, loc, box);
+      });
     }
-    return {LoweredResult{*allocatedResult}, resultIsFinalized, callOp};
+    return {LoweredResult{*allocatedResult}, mustFinalizeResult, callOp};
   }
 
   if (allocatedResult)
diff --git a/flang/test/Lower/HLFIR/function-return-as-expr.f90 b/flang/test/Lower/HLFIR/function-return-as-expr.f90
index 332b0aea9526a..3fafc47833d6c 100644
--- a/flang/test/Lower/HLFIR/function-return-as-expr.f90
+++ b/flang/test/Lower/HLFIR/function-return-as-expr.f90
@@ -71,7 +71,8 @@ end subroutine test4
 ! CHECK:           %[[VAL_6:.*]] = fir.load %[[VAL_0:.*]] : !fir.ref<!fir.class<!fir.heap<none>>>
 ! CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = ".tmp.func_result"} : (!fir.class<!fir.heap<none>>) -> (!fir.class<none>, !fir.class<none>)
 ! CHECK:           hlfir.assign %[[VAL_7]]#0 to %{{.*}}#0 realloc : !fir.class<none>, !fir.ref<!fir.class<!fir.heap<none>>>
-! CHECK:           %[[VAL_10:.*]] = fir.convert %[[VAL_0]] : (!fir.ref<!fir.class<!fir.heap<none>>>) -> !fir.box<none>
+! CHECK:           %[[VAL_8:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.class<!fir.heap<none>>>
+! CHECK:           %[[VAL_10:.*]] = fir.convert %[[VAL_8]] : (!fir.class<!fir.heap<none>>) -> !fir.box<none>
 ! CHECK:           fir.call @_FortranADestroy(%[[VAL_10]]) fastmath<contract> : (!fir.box<none>) -> ()
 
 subroutine test4b
@@ -86,7 +87,8 @@ end subroutine test4b
 ! CHECK:           %[[VAL_6:.*]] = fir.load %[[VAL_0:.*]] : !fir.ref<!fir.class<!fir.heap<!fir.array<?x?xnone>>>>
 ! CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = ".tmp.func_result"} : (!fir.class<!fir.heap<!fir.array<?x?xnone>>>) -> (!fir.class<!fir.array<?x?xnone>>, !fir.class<!fir.array<?x?xnone>>)
 ! CHECK:           hlfir.assign %[[VAL_7]]#0 to %{{.*}}#0 realloc : !fir.class<!fir.array<?x?xnone>>, !fir.ref<!fir.class<!fir.heap<!fir.array<?x?xnone>>>>
-! CHECK:           %[[VAL_10:.*]] = fir.convert %[[VAL_0]] : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?xnone>>>>) -> !fir.box<none>
+! CHECK:           %[[VAL_8:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.class<!fir.heap<!fir.array<?x?xnone>>>>
+! CHECK:           %[[VAL_10:.*]] = fir.convert %[[VAL_8]] : (!fir.class<!fir.heap<!fir.array<?x?xnone>>>) -> !fir.box<none>
 ! CHECK:           fir.call @_FortranADestroy(%[[VAL_10]]) fastmath<contract> : (!fir.box<none>) -> ()
 
 subroutine test5
diff --git a/flang/test/Lower/HLFIR/function-return-destroy.f90 b/flang/test/Lower/HLFIR/function-return-destroy.f90
index 5bd014981c128..ca65a6e1ec39c 100644
--- a/flang/test/Lower/HLFIR/function-return-destroy.f90
+++ b/flang/test/Lower/HLFIR/function-return-destroy.f90
@@ -59,9 +59,11 @@ end function ret_type_t1a
 end subroutine test1a
 ! CHECK-LABEL:   func.func @_QPtest1a() {
 ! CHECK-NOT: fir.call{{.*}}Destroy
-! CHECK:           fir.if %{{.*}} {
-! CHECK-NEXT:        fir.freemem %{{.*}} : !fir.heap<!fir.type<_QMtypesTt1{x:f32}>>
+! CHECK-NOT: fir.freemem
+! CHECK:     hlfir.as_expr %{{.*}} move %true
 ! CHECK-NOT: fir.call{{.*}}Destroy
+! CHECK-NOT: fir.freemem
+! CHECK:     hlfir.destroy
 ! CHECK:           fir.if %{{.*}} {
 ! CHECK:             fir.call @_FortranAAllocatableDeallocate({{.*}}) fastmath<contract> : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
 ! CHECK-NOT: fir.call{{.*}}Destroy

>From 90789ab66f6c7d2cbdb42141ad477863e652555b Mon Sep 17 00:00:00 2001
From: Jean Perier <jperier at nvidia.com>
Date: Tue, 3 Feb 2026 07:03:14 -0800
Subject: [PATCH 4/5] do not insert destroy for elemental

---
 flang/lib/Lower/ConvertCall.cpp | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index d4119f9e8c9d6..2a4bb54bc4324 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -1930,14 +1930,14 @@ genUserCall(Fortran::lower::PreparedActualArguments &loweredActuals,
   prepareUserCallArguments(loweredActuals, caller, callSiteType, callContext,
                            callCleanUps);
 
+  bool isElemental = callContext.isElementalProcWithArrayArgs();
   // Prepare lowered arguments according to the interface
   // and map the lowered values to the dummy
   // arguments.
   auto [loweredResult, resultIsFinalized, callOp] =
       Fortran::lower::genCallOpAndResult(
           loc, callContext.converter, callContext.symMap, callContext.stmtCtx,
-          caller, callSiteType, callContext.resultType,
-          callContext.isElementalProcWithArrayArgs());
+          caller, callSiteType, callContext.resultType, isElemental);
 
   // Clean-up associations and copy-in.
   // The association clean-ups are postponed to the end of the statement
@@ -1983,9 +1983,13 @@ genUserCall(Fortran::lower::PreparedActualArguments &loweredActuals,
       // If the result has no finalization, it can be moved into an expression.
       mlir::Value asExpr = hlfir::AsExprOp::create(
           builder, loc, resultEntity, builder.createBool(loc, mustFree));
-      callContext.stmtCtx.attachCleanup([bldr = &builder, loc, asExpr]() {
-        hlfir::DestroyOp::create(*bldr, loc, asExpr, /*finalize=*/false);
-      });
+      if (!isElemental) {
+        // Insert clean-up for the expression, except for elemental call where
+        // the cleaned-up is inserted at the array level.
+        callContext.stmtCtx.attachCleanup([bldr = &builder, loc, asExpr]() {
+          hlfir::DestroyOp::create(*bldr, loc, asExpr, /*finalize=*/false);
+        });
+      }
       return hlfir::EntityWithAttributes{asExpr};
     }
     if (allocatable)

>From 0e33098be6630a8329068d8eaad3d783598217c7 Mon Sep 17 00:00:00 2001
From: jeanPerier <jean.perier.polytechnique at gmail.com>
Date: Wed, 4 Feb 2026 12:28:30 +0100
Subject: [PATCH 5/5] Apply suggestions from code review

---
 flang/lib/Lower/ConvertCall.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index 2a4bb54bc4324..d72f74b440c53 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -1930,7 +1930,7 @@ genUserCall(Fortran::lower::PreparedActualArguments &loweredActuals,
   prepareUserCallArguments(loweredActuals, caller, callSiteType, callContext,
                            callCleanUps);
 
-  bool isElemental = callContext.isElementalProcWithArrayArgs();
+  const bool isElemental = callContext.isElementalProcWithArrayArgs();
   // Prepare lowered arguments according to the interface
   // and map the lowered values to the dummy
   // arguments.
@@ -1977,7 +1977,7 @@ genUserCall(Fortran::lower::PreparedActualArguments &loweredActuals,
         loc, builder, result, tempResultName, /*insertBefore=*/callOp);
     // Allocatable result must be freed, other results are stack allocated.
     const auto *allocatable = result.getBoxOf<fir::MutableBoxValue>();
-    bool mustFree = allocatable != nullptr;
+    const bool mustFree = allocatable != nullptr;
     resultEntity = loadTrivialScalar(loc, builder, resultEntity);
     if (resultEntity.isVariable()) {
       // If the result has no finalization, it can be moved into an expression.



More information about the flang-commits mailing list