[flang-commits] [flang] 32b423e - [flang][hlfir] Extend InlineHLFIRCopy to inline copy_out with copy-back (#202290)

via flang-commits flang-commits at lists.llvm.org
Mon Jun 8 22:47:24 PDT 2026


Author: Kareem Ergawy
Date: 2026-06-09T07:47:19+02:00
New Revision: 32b423ec57d76315cd42b99c0cdd9c63623d8b7b

URL: https://github.com/llvm/llvm-project/commit/32b423ec57d76315cd42b99c0cdd9c63623d8b7b
DIFF: https://github.com/llvm/llvm-project/commit/32b423ec57d76315cd42b99c0cdd9c63623d8b7b.diff

LOG: [flang][hlfir] Extend InlineHLFIRCopy to inline copy_out with copy-back (#202290)

Rename `InlineHLFIRCopyIn` to `InlineHLFIRCopy` and extend it to inline
the paired `hlfir.copy_out` operation. The copy_out is inlined at its
original location, after the call, ensuring proper ordering of copy-back
and deallocation.

Only inlines when no copy-back is required (intent(in));
intent(inout/out) pairs are left untransformed.

Based on https://github.com/llvm/llvm-project/pull/179096.

Co-Authored-By: Kazuaki Matsumura <kmatsumura at nvidia.com> (Original
author of the changes).
Co-Authored-By: Claude Sonnet 4.6 <noreply at anthropic.com>

Co-authored-by: Kazuaki Matsumura <kmatsumura at nvidia.com>
Co-authored-by: Claude Sonnet 4.6 <noreply at anthropic.com>

Added: 
    flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRCopy.cpp
    flang/test/HLFIR/inline-hlfir-copy.fir

Modified: 
    flang/include/flang/Optimizer/HLFIR/Passes.td
    flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
    flang/lib/Optimizer/Passes/Pipelines.cpp

Removed: 
    flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRCopyIn.cpp
    flang/test/HLFIR/inline-hlfir-copy-in.fir


################################################################################
diff  --git a/flang/include/flang/Optimizer/HLFIR/Passes.td b/flang/include/flang/Optimizer/HLFIR/Passes.td
index 4973715c1055c..2d57a50acb304 100644
--- a/flang/include/flang/Optimizer/HLFIR/Passes.td
+++ b/flang/include/flang/Optimizer/HLFIR/Passes.td
@@ -104,8 +104,8 @@ def InlineHLFIRAssign : Pass<"inline-hlfir-assign"> {
       "device code without inlining all array assignments.">];
 }
 
-def InlineHLFIRCopyIn : Pass<"inline-hlfir-copy-in"> {
-  let summary = "Inline hlfir.copy_in operations";
+def InlineHLFIRCopy : Pass<"inline-hlfir-copy"> {
+  let summary = "Inline hlfir.copy_in and hlfir.copy_out operations";
 }
 
 def PropagateFortranVariableAttributes : Pass<"propagate-fortran-attrs"> {

diff  --git a/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt b/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
index c0c64c19e3826..5e9d57407ad09 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
@@ -7,7 +7,7 @@ add_flang_library(HLFIRTransforms
   InlineElementals.cpp
   InlineHLFIRAssign.cpp
   SeparateAllocatableAssign.cpp
-  InlineHLFIRCopyIn.cpp
+  InlineHLFIRCopy.cpp
   LowerHLFIRIntrinsics.cpp
   LowerHLFIROrderedAssignments.cpp
   ScheduleOrderedAssignments.cpp

diff  --git a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRCopyIn.cpp b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRCopy.cpp
similarity index 70%
rename from flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRCopyIn.cpp
rename to flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRCopy.cpp
index b4e89b0966e9c..f44db65a7a847 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRCopyIn.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRCopy.cpp
@@ -1,14 +1,19 @@
-//===- InlineHLFIRCopyIn.cpp - Inline hlfir.copy_in ops -------------------===//
+//===- InlineHLFIRCopy.cpp - Inline hlfir.copy_in/copy_out ops ------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-// Transform hlfir.copy_in array operations into loop nests performing element
-// per element assignments. For simplicity, the inlining is done for trivial
-// data types when the copy_in does not require a corresponding copy_out and
-// when the input array is not behind a pointer. This may change in the future.
+// Transform hlfir.copy_in and hlfir.copy_out array operations into loop nests
+// performing element per element assignments. For simplicity, the inlining is
+// done for trivial data types when the input array is not behind a pointer.
+// This may change in the future.
+//
+// When the copy_in is inlined, the corresponding copy_out is also inlined.
+// Currently only intent(in) (deallocation-only) copy_out ops are inlined;
+// the copy_in/copy_out pair is left as-is when copy-back is required
+// (intent(inout/out)). Copy-back inlining may be added in the future.
 //===----------------------------------------------------------------------===//
 
 #include "flang/Optimizer/Builder/FIRBuilder.h"
@@ -21,15 +26,15 @@
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 
 namespace hlfir {
-#define GEN_PASS_DEF_INLINEHLFIRCOPYIN
+#define GEN_PASS_DEF_INLINEHLFIRCOPY
 #include "flang/Optimizer/HLFIR/Passes.h.inc"
 } // namespace hlfir
 
-#define DEBUG_TYPE "inline-hlfir-copy-in"
+#define DEBUG_TYPE "inline-hlfir-copy"
 
-static llvm::cl::opt<bool> noInlineHLFIRCopyIn(
-    "no-inline-hlfir-copy-in",
-    llvm::cl::desc("Do not inline hlfir.copy_in operations"),
+static llvm::cl::opt<bool> noInlineHLFIRCopy(
+    "no-inline-hlfir-copy",
+    llvm::cl::desc("Do not inline hlfir.copy_in/copy_out operations"),
     llvm::cl::init(false));
 
 namespace {
@@ -42,6 +47,26 @@ class InlineCopyInConversion : public mlir::OpRewritePattern<hlfir::CopyInOp> {
                   mlir::PatternRewriter &rewriter) const override;
 };
 
+// Inline a copy_out operation (deallocation only — no copy-back).
+// Generates: if (wasCopied) { freemem(temp) }
+static void inlineCopyOut(fir::FirOpBuilder &builder, mlir::Location loc,
+                          mlir::Value tempBox, mlir::Value wasCopied,
+                          mlir::Type sequenceType) {
+  builder.genIfOp(loc, {}, wasCopied, /*withElseRegion=*/false).genThen([&]() {
+    mlir::Value box = fir::LoadOp::create(builder, loc, tempBox);
+    mlir::Value addr = fir::BoxAddrOp::create(builder, loc, box);
+    auto heapType = fir::HeapType::get(sequenceType);
+    mlir::Value heapAddr = fir::ConvertOp::create(builder, loc, heapType, addr);
+    fir::FreeMemOp::create(builder, loc, heapAddr);
+  });
+}
+
+// Note: We don't have a separate InlineCopyOutConversion pattern.
+// Copy_out inlining is handled by InlineCopyInConversion when it inlines
+// the paired copy_in. For copy_outs that aren't paired with an eligible
+// copy_in (e.g., optional args, assumed-rank, non-trivial types), the
+// copy_out is left as-is and will be lowered to a runtime call.
+
 llvm::LogicalResult
 InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn,
                                         mlir::PatternRewriter &rewriter) const {
@@ -81,6 +106,12 @@ InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn,
   mlir::Type sequenceType =
       hlfir::getFortranElementOrSequenceType(inputVariable.getType());
   fir::BoxType resultBoxType = fir::BoxType::get(sequenceType);
+
+  // Compute shape for use in the copy-in loop and temporary declaration.
+  mlir::Value shape = hlfir::genShape(loc, builder, inputVariable);
+  llvm::SmallVector<mlir::Value> extents =
+      hlfir::getIndexExtents(loc, builder, shape);
+
   mlir::Value isContiguous =
       fir::IsContiguousBoxOp::create(builder, loc, inputVariable);
   mlir::Operation::result_range results =
@@ -99,9 +130,6 @@ InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn,
                 mlir::ValueRange{result, builder.createBool(loc, false)});
           })
           .genElse([&] {
-            mlir::Value shape = hlfir::genShape(loc, builder, inputVariable);
-            llvm::SmallVector<mlir::Value> extents =
-                hlfir::getIndexExtents(loc, builder, shape);
             llvm::StringRef tmpName{".tmp.copy_in"};
             llvm::SmallVector<mlir::Value> lenParams;
             mlir::Value alloc = builder.createHeapTemporary(
@@ -148,20 +176,25 @@ InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn,
   mlir::OpResult resultBox = results[0];
   mlir::OpResult needsCleanup = results[1];
 
-  // Prepare the corresponding copyOut to free the temporary if it is required
+  // Inline the corresponding copyOut (deallocation only).
+  // Store the resultBox first since it's a box value.
   auto alloca = fir::AllocaOp::create(builder, loc, resultBox.getType());
-  auto store = fir::StoreOp::create(builder, loc, resultBox, alloca);
-  rewriter.startOpModification(copyOut);
-  copyOut->setOperand(0, store.getMemref());
-  copyOut->setOperand(1, needsCleanup);
-  rewriter.finalizeOpModification(copyOut);
+  fir::StoreOp::create(builder, loc, resultBox, alloca);
+
+  rewriter.setInsertionPoint(copyOut);
+  fir::FirOpBuilder copyOutBuilder(rewriter, copyOut.getOperation());
+  inlineCopyOut(copyOutBuilder, copyOut.getLoc(), alloca, needsCleanup,
+                sequenceType);
+
+  // Erase the copyOut since we've inlined it
+  rewriter.eraseOp(copyOut);
 
   rewriter.replaceOp(copyIn, {resultBox, builder.genNot(loc, isContiguous)});
   return mlir::success();
 }
 
-class InlineHLFIRCopyInPass
-    : public hlfir::impl::InlineHLFIRCopyInBase<InlineHLFIRCopyInPass> {
+class InlineHLFIRCopyPass
+    : public hlfir::impl::InlineHLFIRCopyBase<InlineHLFIRCopyPass> {
 public:
   void runOnOperation() override {
     mlir::MLIRContext *context = &getContext();
@@ -172,14 +205,14 @@ class InlineHLFIRCopyInPass
         mlir::GreedySimplifyRegionLevel::Disabled);
 
     mlir::RewritePatternSet patterns(context);
-    if (!noInlineHLFIRCopyIn) {
+    if (!noInlineHLFIRCopy) {
       patterns.insert<InlineCopyInConversion>(context);
     }
 
     if (mlir::failed(mlir::applyPatternsGreedily(
             getOperation(), std::move(patterns), config))) {
       mlir::emitError(getOperation()->getLoc(),
-                      "failure in hlfir.copy_in inlining");
+                      "failure in hlfir.copy_in/copy_out inlining");
       signalPassFailure();
     }
   }

diff  --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp
index 8e8521391885e..c6d531ce50762 100644
--- a/flang/lib/Optimizer/Passes/Pipelines.cpp
+++ b/flang/lib/Optimizer/Passes/Pipelines.cpp
@@ -311,7 +311,7 @@ void createHLFIRToFIRPassPipeline(mlir::PassManager &pm,
 
     if (optLevel == llvm::OptimizationLevel::O3) {
       addNestedPassToAllTopLevelOperations<PassConstructor>(
-          pm, hlfir::createInlineHLFIRCopyIn);
+          pm, hlfir::createInlineHLFIRCopy);
     }
   } else if (config.EnableOpenMPIsTargetDevice) {
     // At O0, only inline scalar-to-array broadcasts when compiling for an

diff  --git a/flang/test/HLFIR/inline-hlfir-copy-in.fir b/flang/test/HLFIR/inline-hlfir-copy.fir
similarity index 78%
rename from flang/test/HLFIR/inline-hlfir-copy-in.fir
rename to flang/test/HLFIR/inline-hlfir-copy.fir
index f1da1da9f9a5c..d8a96ca2c0b04 100644
--- a/flang/test/HLFIR/inline-hlfir-copy-in.fir
+++ b/flang/test/HLFIR/inline-hlfir-copy.fir
@@ -1,5 +1,5 @@
-// Test inlining of hlfir.copy_in
-// RUN: fir-opt --inline-hlfir-copy-in %s | FileCheck %s
+// Test inlining of hlfir.copy_in and hlfir.copy_out
+// RUN: fir-opt --inline-hlfir-copy %s | FileCheck %s
 
 // Test inlining of hlfir.copy_in that does not require the array to be copied out
 func.func private @_test_inline_copy_in(%arg0: !fir.box<!fir.array<?x?x?xf64>> {fir.bindc_name = "x"}, %arg1: !fir.ref<i32> {fir.bindc_name = "i"}, %arg2: !fir.ref<i32> {fir.bindc_name = "j"}) {
@@ -72,16 +72,23 @@ func.func private @_test_inline_copy_in(%arg0: !fir.box<!fir.array<?x?x?xf64>> {
 // CHECK:      }
 // CHECK:      fir.result %[[VAL_25:.*]]#0, %[[VAL_3:.*]] : !fir.box<!fir.array<?xf64>>, i1
 // CHECK:    }
+// CHECK:    %[[VAL_ALLOCA:.*]] = fir.alloca !fir.box<!fir.array<?xf64>>
+// CHECK:    fir.store %[[VAL_21:.*]]#0 to %[[VAL_ALLOCA:.*]] : !fir.ref<!fir.box<!fir.array<?xf64>>>
 // CHECK:    %[[VAL_22:.*]] = fir.box_addr %[[VAL_21:.*]]#0 : (!fir.box<!fir.array<?xf64>>) -> !fir.ref<!fir.array<?xf64>>
 // CHECK:    %[[VAL_23:.*]]:3 = hlfir.associate %[[VAL_5:.*]] {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
 // CHECK:    fir.call @_QFPsb(%[[VAL_22:.*]], %[[VAL_23:.*]]#0) fastmath<contract> : (!fir.ref<!fir.array<?xf64>>, !fir.ref<i32>) -> ()
-// CHECK:    hlfir.copy_out %{{.*}}, %[[VAL_21:.*]]#1 : (!fir.ref<!fir.box<!fir.array<?xf64>>>, i1) -> ()
+// CHECK:    fir.if %[[VAL_21:.*]]#1 {
+// CHECK:      %[[VAL_BOX:.*]] = fir.load %[[VAL_ALLOCA:.*]] : !fir.ref<!fir.box<!fir.array<?xf64>>>
+// CHECK:      %[[VAL_ADDR:.*]] = fir.box_addr %[[VAL_BOX:.*]] : (!fir.box<!fir.array<?xf64>>) -> !fir.ref<!fir.array<?xf64>>
+// CHECK:      %[[VAL_HEAP:.*]] = fir.convert %[[VAL_ADDR:.*]] : (!fir.ref<!fir.array<?xf64>>) -> !fir.heap<!fir.array<?xf64>>
+// CHECK:      fir.freemem %[[VAL_HEAP:.*]] : !fir.heap<!fir.array<?xf64>>
+// CHECK:    }
 // CHECK:    hlfir.end_associate %[[VAL_23:.*]]#1, %[[VAL_23:.*]]#2 : !fir.ref<i32>, i1
 // CHECK:    return
 // CHECK:  }
 
-// Test not inlining of hlfir.copy_in that requires the array to be copied out
-func.func private @_test_no_inline_copy_in(%arg0: !fir.box<!fir.array<?x?x?xf64>> {fir.bindc_name = "x"}, %arg1: !fir.ref<i32> {fir.bindc_name = "i"}, %arg2: !fir.ref<i32> {fir.bindc_name = "j"}) {
+// Test not inlining of hlfir.copy_in when copy-back is required (intent(inout))
+func.func private @_test_inline_copy_in_with_copyback(%arg0: !fir.box<!fir.array<?x?x?xf64>> {fir.bindc_name = "x"}, %arg1: !fir.ref<i32> {fir.bindc_name = "i"}, %arg2: !fir.ref<i32> {fir.bindc_name = "j"}) {
   %0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf64>>>
   %1 = fir.dummy_scope : !fir.dscope
   %2:2 = hlfir.declare %arg1 dummy_scope %1 {uniq_name = "_QFFsb2Ei"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
@@ -113,33 +120,14 @@ func.func private @_test_no_inline_copy_in(%arg0: !fir.box<!fir.array<?x?x?xf64>
   return
 }
 
-// CHECK-LABEL:  func.func private @_test_no_inline_copy_in(
-// CHECK-SAME:                                             %[[VAL_0:.*]]: !fir.box<!fir.array<?x?x?xf64>> {fir.bindc_name = "x"},
-// CHECK-SAME:                                             %[[VAL_1:.*]]: !fir.ref<i32> {fir.bindc_name = "i"},
-// CHECK-SAME:                                             %[[VAL_2:.*]]: !fir.ref<i32> {fir.bindc_name = "j"}) {
-// CHECK:    %[[VAL_3:.*]] = arith.constant 100 : i32
-// CHECK:    %[[VAL_4:.*]] = arith.constant 0 : index
-// CHECK:    %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK:    %[[VAL_6:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf64>>>
-// CHECK:    %[[VAL_7:.*]] = fir.dummy_scope : !fir.dscope
-// CHECK:    %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_1:.*]] dummy_scope %[[VAL_7:.*]] {uniq_name = "_QFFsb2Ei"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
-// CHECK:    %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_2:.*]] dummy_scope %[[VAL_7:.*]] {uniq_name = "_QFFsb2Ej"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
-// CHECK:    %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_0:.*]] dummy_scope %[[VAL_7:.*]] {uniq_name = "_QFFsb2Ex"} : (!fir.box<!fir.array<?x?x?xf64>>, !fir.dscope) -> (!fir.box<!fir.array<?x?x?xf64>>, !fir.box<!fir.array<?x?x?xf64>>)
-// CHECK:    %[[VAL_11:.*]] = fir.load %[[VAL_8:.*]]#0 : !fir.ref<i32>
-// CHECK:    %[[VAL_12:.*]] = fir.convert %[[VAL_11:.*]] : (i32) -> i64
-// CHECK:    %[[VAL_13:.*]]:3 = fir.box_dims %[[VAL_10:.*]]#1, %[[VAL_5:.*]] : (!fir.box<!fir.array<?x?x?xf64>>, index) -> (index, index, index)
-// CHECK:    %[[VAL_14:.*]] = arith.cmpi sgt, %[[VAL_13:.*]]#1, %[[VAL_4:.*]] : index
-// CHECK:    %[[VAL_15:.*]] = arith.select %[[VAL_14:.*]], %[[VAL_13:.*]]#1, %[[VAL_4:.*]] : index
-// CHECK:    %[[VAL_16:.*]] = fir.load %[[VAL_9:.*]]#0 : !fir.ref<i32>
-// CHECK:    %[[VAL_17:.*]] = fir.convert %[[VAL_16:.*]] : (i32) -> i64
-// CHECK:    %[[VAL_18:.*]] = fir.shape %[[VAL_15:.*]] : (index) -> !fir.shape<1>
-// CHECK:    %[[VAL_19:.*]] = hlfir.designate %[[VAL_10:.*]]#0 (%[[VAL_12:.*]], %[[VAL_5:.*]]:%[[VAL_13:.*]]#1:%[[VAL_5:.*]], %[[VAL_17:.*]])  shape %[[VAL_18:.*]] : (!fir.box<!fir.array<?x?x?xf64>>, i64, index, index, index, i64, !fir.shape<1>) -> !fir.box<!fir.array<?xf64>>
-// CHECK:    %[[VAL_20:.*]]:2 = hlfir.copy_in %[[VAL_19:.*]] to %[[VAL_6:.*]] : (!fir.box<!fir.array<?xf64>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.box<!fir.array<?xf64>>, i1)
-// CHECK:    %[[VAL_21:.*]] = fir.box_addr %[[VAL_20:.*]]#0 : (!fir.box<!fir.array<?xf64>>) -> !fir.ref<!fir.array<?xf64>>
-// CHECK:    %[[VAL_22:.*]]:3 = hlfir.associate %[[VAL_3:.*]] {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
-// CHECK:    fir.call @_QFPsb(%[[VAL_21:.*]], %[[VAL_22:.*]]#1) fastmath<contract> : (!fir.ref<!fir.array<?xf64>>, !fir.ref<i32>) -> ()
-// CHECK:    hlfir.copy_out %[[VAL_6:.*]], %[[VAL_20:.*]]#1 to %[[VAL_19:.*]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, i1, !fir.box<!fir.array<?xf64>>) -> ()
-// CHECK:    hlfir.end_associate %[[VAL_22:.*]]#1, %[[VAL_22:.*]]#2 : !fir.ref<i32>, i1
+// CHECK-LABEL:  func.func private @_test_inline_copy_in_with_copyback(
+// CHECK-SAME:      %[[VAL_0:.*]]: !fir.box<!fir.array<?x?x?xf64>> {fir.bindc_name = "x"},
+// CHECK-SAME:      %[[VAL_1:.*]]: !fir.ref<i32> {fir.bindc_name = "i"},
+// CHECK-SAME:      %[[VAL_2:.*]]: !fir.ref<i32> {fir.bindc_name = "j"}) {
+// CHECK:    %[[TMP:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf64>>>
+// CHECK:    %[[COPYIN:.*]]:2 = hlfir.copy_in %{{.*}} to %[[TMP]] : (!fir.box<!fir.array<?xf64>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.box<!fir.array<?xf64>>, i1)
+// CHECK:    fir.call @_QFPsb(
+// CHECK:    hlfir.copy_out %[[TMP]], %[[COPYIN]]#1 to %{{.*}} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, i1, !fir.box<!fir.array<?xf64>>) -> ()
 // CHECK:    return
 // CHECK:  }
 


        


More information about the flang-commits mailing list