[flang-commits] [flang] [flang] Inline hlfir.copy_in for trivial types (PR #138718)
Kajetan Puchalski via flang-commits
flang-commits at lists.llvm.org
Thu May 22 07:26:55 PDT 2025
https://github.com/mrkajetanp updated https://github.com/llvm/llvm-project/pull/138718
>From 6bbba43416edf2c4d310d3e92e99ad458e87d8f7 Mon Sep 17 00:00:00 2001
From: Kajetan Puchalski <kajetan.puchalski at arm.com>
Date: Thu, 10 Apr 2025 14:04:52 +0000
Subject: [PATCH 1/4] [flang] Inline hlfir.copy_in for trivial types
hlfir.copy_in implements copying non-contiguous array slices for functions
that take in arrays required to be contiguous through a flang-rt function
that calls memcpy/memmove separately on each element.
For large arrays of trivial types, this can incur considerable overhead
compared to a plain copy loop that is better able to take advantage of
hardware pipelines.
To address that, extend the InlineHLFIRAssign optimisation pass with a
new pattern for inlining hlfir.copy_in operations for trivial types.
For the time being, the pattern is only applied in cases where the
copy-in does not require a corresponding copy-out, such as when the
function being called declares the array parameter as intent(in).
Applying this optimisation reduces the runtime of thornado-mini's
DeleptonizationProblem by a factor of about 1/3rd.
Signed-off-by: Kajetan Puchalski <kajetan.puchalski at arm.com>
---
.../HLFIR/Transforms/InlineHLFIRAssign.cpp | 117 ++++++++++++++++++
1 file changed, 117 insertions(+)
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp
index 6e209cce07ad4..38c684eaceb7d 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp
@@ -13,6 +13,7 @@
#include "flang/Optimizer/Analysis/AliasAnalysis.h"
#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Builder/HLFIRTools.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Optimizer/HLFIR/Passes.h"
#include "flang/Optimizer/OpenMP/Passes.h"
@@ -127,6 +128,121 @@ class InlineHLFIRAssignConversion
}
};
+class InlineCopyInConversion : public mlir::OpRewritePattern<hlfir::CopyInOp> {
+public:
+ using mlir::OpRewritePattern<hlfir::CopyInOp>::OpRewritePattern;
+
+ llvm::LogicalResult
+ matchAndRewrite(hlfir::CopyInOp copyIn,
+ mlir::PatternRewriter &rewriter) const override;
+};
+
+llvm::LogicalResult
+InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn,
+ mlir::PatternRewriter &rewriter) const {
+ fir::FirOpBuilder builder(rewriter, copyIn.getOperation());
+ mlir::Location loc = copyIn.getLoc();
+ hlfir::Entity inputVariable{copyIn.getVar()};
+ if (!fir::isa_trivial(inputVariable.getFortranElementType()))
+ return rewriter.notifyMatchFailure(copyIn,
+ "CopyInOp's data type is not trivial");
+
+ if (fir::isPointerType(inputVariable.getType()))
+ return rewriter.notifyMatchFailure(
+ copyIn, "CopyInOp's input variable is a pointer");
+
+ // There should be exactly one user of WasCopied - the corresponding
+ // CopyOutOp.
+ if (copyIn.getWasCopied().getUses().empty())
+ return rewriter.notifyMatchFailure(copyIn,
+ "CopyInOp's WasCopied has no uses");
+ // The copy out should always be present, either to actually copy or just
+ // deallocate memory.
+ auto *copyOut =
+ copyIn.getWasCopied().getUsers().begin().getCurrent().getUser();
+
+ if (!mlir::isa<hlfir::CopyOutOp>(copyOut))
+ return rewriter.notifyMatchFailure(copyIn,
+ "CopyInOp has no direct CopyOut");
+
+ // Only inline the copy_in when copy_out does not need to be done, i.e. in
+ // case of intent(in).
+ if (::llvm::cast<hlfir::CopyOutOp>(copyOut).getVar())
+ return rewriter.notifyMatchFailure(copyIn, "CopyIn needs a copy-out");
+
+ inputVariable =
+ hlfir::derefPointersAndAllocatables(loc, builder, inputVariable);
+ mlir::Type resultAddrType = copyIn.getCopiedIn().getType();
+ mlir::Value isContiguous =
+ builder.create<fir::IsContiguousBoxOp>(loc, inputVariable);
+ auto results =
+ builder
+ .genIfOp(loc, {resultAddrType, builder.getI1Type()}, isContiguous,
+ /*withElseRegion=*/true)
+ .genThen([&]() {
+ mlir::Value falseVal = builder.create<mlir::arith::ConstantOp>(
+ loc, builder.getI1Type(), builder.getBoolAttr(false));
+ builder.create<fir::ResultOp>(
+ loc, mlir::ValueRange{inputVariable, falseVal});
+ })
+ .genElse([&] {
+ auto [temp, cleanup] =
+ hlfir::createTempFromMold(loc, builder, inputVariable);
+ mlir::Value shape = hlfir::genShape(loc, builder, inputVariable);
+ llvm::SmallVector<mlir::Value> extents =
+ hlfir::getIndexExtents(loc, builder, shape);
+ hlfir::LoopNest loopNest = hlfir::genLoopNest(
+ loc, builder, extents, /*isUnordered=*/true,
+ flangomp::shouldUseWorkshareLowering(copyIn));
+ builder.setInsertionPointToStart(loopNest.body);
+ auto elem = hlfir::getElementAt(loc, builder, inputVariable,
+ loopNest.oneBasedIndices);
+ elem = hlfir::loadTrivialScalar(loc, builder, elem);
+ auto tempElem = hlfir::getElementAt(loc, builder, temp,
+ loopNest.oneBasedIndices);
+ builder.create<hlfir::AssignOp>(loc, elem, tempElem);
+ builder.setInsertionPointAfter(loopNest.outerOp);
+
+ mlir::Value result;
+ // Make sure the result is always a boxed array by boxing it
+ // ourselves if need be.
+ if (mlir::isa<fir::BaseBoxType>(temp.getType())) {
+ result = temp;
+ } else {
+ auto refTy =
+ fir::ReferenceType::get(temp.getElementOrSequenceType());
+ auto refVal = builder.createConvert(loc, refTy, temp);
+ result =
+ builder.create<fir::EmboxOp>(loc, resultAddrType, refVal);
+ }
+
+ builder.create<fir::ResultOp>(loc,
+ mlir::ValueRange{result, cleanup});
+ })
+ .getResults();
+
+ auto addr = results[0];
+ auto needsCleanup = results[1];
+
+ builder.setInsertionPoint(copyOut);
+ builder.genIfOp(loc, {}, needsCleanup, false).genThen([&] {
+ auto boxAddr = builder.create<fir::BoxAddrOp>(loc, addr);
+ auto heapType = fir::HeapType::get(fir::BoxValue(addr).getBaseTy());
+ auto heapVal = builder.createConvert(loc, heapType, boxAddr.getResult());
+ builder.create<fir::FreeMemOp>(loc, heapVal);
+ });
+ rewriter.eraseOp(copyOut);
+
+ auto tempBox = copyIn.getTempBox();
+
+ rewriter.replaceOp(copyIn, {addr, builder.genNot(loc, isContiguous)});
+
+ // The TempBox is only needed for flang-rt calls which we're no longer
+ // generating.
+ rewriter.eraseOp(tempBox.getDefiningOp());
+ return mlir::success();
+}
+
class InlineHLFIRAssignPass
: public hlfir::impl::InlineHLFIRAssignBase<InlineHLFIRAssignPass> {
public:
@@ -140,6 +256,7 @@ class InlineHLFIRAssignPass
mlir::RewritePatternSet patterns(context);
patterns.insert<InlineHLFIRAssignConversion>(context);
+ patterns.insert<InlineCopyInConversion>(context);
if (mlir::failed(mlir::applyPatternsGreedily(
getOperation(), std::move(patterns), config))) {
>From 32a62862f141c3894c7a30b141ddf3b7701cb85b Mon Sep 17 00:00:00 2001
From: Kajetan Puchalski <kajetan.puchalski at arm.com>
Date: Wed, 7 May 2025 16:04:07 +0000
Subject: [PATCH 2/4] Add tests
Signed-off-by: Kajetan Puchalski <kajetan.puchalski at arm.com>
---
flang/test/HLFIR/inline-hlfir-assign.fir | 144 +++++++++++++++++++++++
1 file changed, 144 insertions(+)
diff --git a/flang/test/HLFIR/inline-hlfir-assign.fir b/flang/test/HLFIR/inline-hlfir-assign.fir
index f834e7971e3d5..df7681b9c5c16 100644
--- a/flang/test/HLFIR/inline-hlfir-assign.fir
+++ b/flang/test/HLFIR/inline-hlfir-assign.fir
@@ -353,3 +353,147 @@ func.func @_QPtest_expr_rhs(%arg0: !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.
// CHECK: hlfir.destroy %[[VAL_10]] : !hlfir.expr<?x!fir.logical<4>>
// CHECK: return
// CHECK: }
+
+// Test inlining of hlfir.copy_in that does not require the array to be copied out
+func.func private @_test_inline_copy_in(%arg0: !fir.box<!fir.array<?x?x?xf64>> {fir.bindc_name = "x"}, %arg1: !fir.ref<i32> {fir.bindc_name = "i"}, %arg2: !fir.ref<i32> {fir.bindc_name = "j"}) {
+ %0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf64>>>
+ %1 = fir.dummy_scope : !fir.dscope
+ %2:2 = hlfir.declare %arg1 dummy_scope %1 {uniq_name = "_QFFsb2Ei"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %3:2 = hlfir.declare %arg2 dummy_scope %1 {uniq_name = "_QFFsb2Ej"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %4:2 = hlfir.declare %arg0 dummy_scope %1 {uniq_name = "_QFFsb2Ex"} : (!fir.box<!fir.array<?x?x?xf64>>, !fir.dscope) -> (!fir.box<!fir.array<?x?x?xf64>>, !fir.box<!fir.array<?x?x?xf64>>)
+ %5 = fir.load %2#0 : !fir.ref<i32>
+ %6 = fir.convert %5 : (i32) -> i64
+ %c1 = arith.constant 1 : index
+ %c1_0 = arith.constant 1 : index
+ %7:3 = fir.box_dims %4#1, %c1_0 : (!fir.box<!fir.array<?x?x?xf64>>, index) -> (index, index, index)
+ %c1_1 = arith.constant 1 : index
+ %c0 = arith.constant 0 : index
+ %8 = arith.subi %7#1, %c1 : index
+ %9 = arith.addi %8, %c1_1 : index
+ %10 = arith.divsi %9, %c1_1 : index
+ %11 = arith.cmpi sgt, %10, %c0 : index
+ %12 = arith.select %11, %10, %c0 : index
+ %13 = fir.load %3#0 : !fir.ref<i32>
+ %14 = fir.convert %13 : (i32) -> i64
+ %15 = fir.shape %12 : (index) -> !fir.shape<1>
+ %16 = hlfir.designate %4#0 (%6, %c1:%7#1:%c1_1, %14) shape %15 : (!fir.box<!fir.array<?x?x?xf64>>, i64, index, index, index, i64, !fir.shape<1>) -> !fir.box<!fir.array<?xf64>>
+ %c100_i32 = arith.constant 100 : i32
+ %17:2 = hlfir.copy_in %16 to %0 : (!fir.box<!fir.array<?xf64>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.box<!fir.array<?xf64>>, i1)
+ %18 = fir.box_addr %17#0 : (!fir.box<!fir.array<?xf64>>) -> !fir.ref<!fir.array<?xf64>>
+ %19:3 = hlfir.associate %c100_i32 {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
+ fir.call @_QFPsb(%18, %19#0) fastmath<contract> : (!fir.ref<!fir.array<?xf64>>, !fir.ref<i32>) -> ()
+ hlfir.copy_out %0, %17#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, i1) -> ()
+ hlfir.end_associate %19#1, %19#2 : !fir.ref<i32>, i1
+ return
+}
+
+// CHECK-LABEL: func.func private @_test_inline_copy_in(
+// CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<?x?x?xf64>> {fir.bindc_name = "x"},
+// CHECK-SAME: %[[VAL_1:.*]]: !fir.ref<i32> {fir.bindc_name = "i"},
+// CHECK-SAME: %[[VAL_2:.*]]: !fir.ref<i32> {fir.bindc_name = "j"}) {
+// CHECK: %[[VAL_3:.*]] = arith.constant true
+// CHECK: %[[VAL_4:.*]] = arith.constant false
+// CHECK: %[[VAL_5:.*]] = arith.constant 100 : i32
+// CHECK: %[[VAL_6:.*]] = arith.constant 0 : index
+// CHECK: %[[VAL_7:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_8:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK: %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_1:.*]] dummy_scope %[[VAL_8:.*]] {uniq_name = "_QFFsb2Ei"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_2:.*]] dummy_scope %[[VAL_8:.*]] {uniq_name = "_QFFsb2Ej"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_0:.*]] dummy_scope %[[VAL_8:.*]] {uniq_name = "_QFFsb2Ex"} : (!fir.box<!fir.array<?x?x?xf64>>, !fir.dscope) -> (!fir.box<!fir.array<?x?x?xf64>>, !fir.box<!fir.array<?x?x?xf64>>)
+// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_22:.*]]#0 : !fir.ref<i32>
+// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11:.*]] : (i32) -> i64
+// CHECK: %[[VAL_13:.*]]:3 = fir.box_dims %[[VAL_10:.*]]#1, %[[VAL_7:.*]] : (!fir.box<!fir.array<?x?x?xf64>>, index) -> (index, index, index)
+// CHECK: %[[VAL_14:.*]] = arith.cmpi sgt, %[[VAL_13:.*]]#1, %[[VAL_6:.*]] : index
+// CHECK: %[[VAL_15:.*]] = arith.select %[[VAL_14:.*]], %[[VAL_13:.*]]#1, %[[VAL_6:.*]] : index
+// CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_9:.*]]#0 : !fir.ref<i32>
+// CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_16:.*]] : (i32) -> i64
+// CHECK: %[[VAL_18:.*]] = fir.shape %[[VAL_15:.*]] : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_10:.*]]#0 (%[[VAL_12:.*]], %[[VAL_7:.*]]:%[[VAL_13:.*]]#1:%[[VAL_7:.*]], %[[VAL_17:.*]]) shape %[[VAL_18:.*]] : (!fir.box<!fir.array<?x?x?xf64>>, i64, index, index, index, i64, !fir.shape<1>) -> !fir.box<!fir.array<?xf64>>
+// CHECK: %[[VAL_20:.*]] = fir.is_contiguous_box %[[VAL_19:.*]] whole : (!fir.box<!fir.array<?xf64>>) -> i1
+// CHECK: %[[VAL_21:.*]]:2 = fir.if %[[VAL_20:.*]] -> (!fir.box<!fir.array<?xf64>>, i1) {
+// CHECK: fir.result %[[VAL_19:.*]], %[[VAL_4:.*]] : !fir.box<!fir.array<?xf64>>, i1
+// CHECK: } else {
+// CHECK: %[[VAL_24:.*]] = fir.allocmem !fir.array<?xf64>, %[[VAL_15:.*]] {bindc_name = ".tmp", uniq_name = ""}
+// CHECK: %[[VAL_25:.*]]:2 = hlfir.declare %[[VAL_24:.*]](%[[VAL_18:.*]]) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?xf64>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xf64>>, !fir.heap<!fir.array<?xf64>>)
+// CHECK: fir.do_loop %arg3 = %[[VAL_7:.*]] to %[[VAL_15:.*]] step %[[VAL_7:.*]] unordered {
+// CHECK: %[[VAL_26:.*]] = hlfir.designate %[[VAL_19:.*]] (%arg3) : (!fir.box<!fir.array<?xf64>>, index) -> !fir.ref<f64>
+// CHECK: %[[VAL_27:.*]] = fir.load %[[VAL_26:.*]] : !fir.ref<f64>
+// CHECK: %[[VAL_28:.*]] = hlfir.designate %[[VAL_25:.*]]#0 (%arg3) : (!fir.box<!fir.array<?xf64>>, index) -> !fir.ref<f64>
+// CHECK: hlfir.assign %[[VAL_27:.*]] to %[[VAL_28:.*]] : f64, !fir.ref<f64>
+// CHECK: }
+// CHECK: fir.result %[[VAL_25:.*]]#0, %[[VAL_3:.*]] : !fir.box<!fir.array<?xf64>>, i1
+// CHECK: }
+// CHECK: %[[VAL_22:.*]] = fir.box_addr %[[VAL_21:.*]]#0 : (!fir.box<!fir.array<?xf64>>) -> !fir.ref<!fir.array<?xf64>>
+// CHECK: %[[VAL_23:.*]]:3 = hlfir.associate %[[VAL_5:.*]] {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
+// CHECK: fir.call @_QFPsb(%[[VAL_22:.*]], %[[VAL_23:.*]]#0) fastmath<contract> : (!fir.ref<!fir.array<?xf64>>, !fir.ref<i32>) -> ()
+// CHECK: fir.if %[[VAL_21:.*]]#1 {
+// CHECK: %[[VAL_24:.*]] = fir.box_addr %[[VAL_21:.*]]#0 : (!fir.box<!fir.array<?xf64>>) -> !fir.ref<!fir.array<?xf64>>
+// CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_24:.*]] : (!fir.ref<!fir.array<?xf64>>) -> !fir.heap<!fir.array<?xf64>>
+// CHECK: fir.freemem %[[VAL_25:.*]] : !fir.heap<!fir.array<?xf64>>
+// CHECK: }
+// CHECK: hlfir.end_associate %[[VAL_23:.*]]#1, %[[VAL_23:.*]]#2 : !fir.ref<i32>, i1
+// CHECK: return
+// CHECK: }
+
+// Test not inlining of hlfir.copy_in that requires the array to be copied out
+func.func private @_test_no_inline_copy_in(%arg0: !fir.box<!fir.array<?x?x?xf64>> {fir.bindc_name = "x"}, %arg1: !fir.ref<i32> {fir.bindc_name = "i"}, %arg2: !fir.ref<i32> {fir.bindc_name = "j"}) {
+ %0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf64>>>
+ %1 = fir.dummy_scope : !fir.dscope
+ %2:2 = hlfir.declare %arg1 dummy_scope %1 {uniq_name = "_QFFsb2Ei"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %3:2 = hlfir.declare %arg2 dummy_scope %1 {uniq_name = "_QFFsb2Ej"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %4:2 = hlfir.declare %arg0 dummy_scope %1 {uniq_name = "_QFFsb2Ex"} : (!fir.box<!fir.array<?x?x?xf64>>, !fir.dscope) -> (!fir.box<!fir.array<?x?x?xf64>>, !fir.box<!fir.array<?x?x?xf64>>)
+ %5 = fir.load %2#0 : !fir.ref<i32>
+ %6 = fir.convert %5 : (i32) -> i64
+ %c1 = arith.constant 1 : index
+ %c1_0 = arith.constant 1 : index
+ %7:3 = fir.box_dims %4#1, %c1_0 : (!fir.box<!fir.array<?x?x?xf64>>, index) -> (index, index, index)
+ %c1_1 = arith.constant 1 : index
+ %c0 = arith.constant 0 : index
+ %8 = arith.subi %7#1, %c1 : index
+ %9 = arith.addi %8, %c1_1 : index
+ %10 = arith.divsi %9, %c1_1 : index
+ %11 = arith.cmpi sgt, %10, %c0 : index
+ %12 = arith.select %11, %10, %c0 : index
+ %13 = fir.load %3#0 : !fir.ref<i32>
+ %14 = fir.convert %13 : (i32) -> i64
+ %15 = fir.shape %12 : (index) -> !fir.shape<1>
+ %16 = hlfir.designate %4#0 (%6, %c1:%7#1:%c1_1, %14) shape %15 : (!fir.box<!fir.array<?x?x?xf64>>, i64, index, index, index, i64, !fir.shape<1>) -> !fir.box<!fir.array<?xf64>>
+ %c100_i32 = arith.constant 100 : i32
+ %17:2 = hlfir.copy_in %16 to %0 : (!fir.box<!fir.array<?xf64>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.box<!fir.array<?xf64>>, i1)
+ %18 = fir.box_addr %17#0 : (!fir.box<!fir.array<?xf64>>) -> !fir.ref<!fir.array<?xf64>>
+ %19:3 = hlfir.associate %c100_i32 {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
+ fir.call @_QFPsb(%18, %19#1) fastmath<contract> : (!fir.ref<!fir.array<?xf64>>, !fir.ref<i32>) -> ()
+ hlfir.copy_out %0, %17#1 to %16 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, i1, !fir.box<!fir.array<?xf64>>) -> ()
+ hlfir.end_associate %19#1, %19#2 : !fir.ref<i32>, i1
+ return
+}
+
+// CHECK-LABEL: func.func private @_test_no_inline_copy_in(
+// CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<?x?x?xf64>> {fir.bindc_name = "x"},
+// CHECK-SAME: %[[VAL_1:.*]]: !fir.ref<i32> {fir.bindc_name = "i"},
+// CHECK-SAME: %[[VAL_2:.*]]: !fir.ref<i32> {fir.bindc_name = "j"}) {
+// CHECK: %[[VAL_3:.*]] = arith.constant 100 : i32
+// CHECK: %[[VAL_4:.*]] = arith.constant 0 : index
+// CHECK: %[[VAL_5:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_6:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf64>>>
+// CHECK: %[[VAL_7:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_1:.*]] dummy_scope %[[VAL_7:.*]] {uniq_name = "_QFFsb2Ei"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_2:.*]] dummy_scope %[[VAL_7:.*]] {uniq_name = "_QFFsb2Ej"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_0:.*]] dummy_scope %[[VAL_7:.*]] {uniq_name = "_QFFsb2Ex"} : (!fir.box<!fir.array<?x?x?xf64>>, !fir.dscope) -> (!fir.box<!fir.array<?x?x?xf64>>, !fir.box<!fir.array<?x?x?xf64>>)
+// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_8:.*]]#0 : !fir.ref<i32>
+// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11:.*]] : (i32) -> i64
+// CHECK: %[[VAL_13:.*]]:3 = fir.box_dims %[[VAL_10:.*]]#1, %[[VAL_5:.*]] : (!fir.box<!fir.array<?x?x?xf64>>, index) -> (index, index, index)
+// CHECK: %[[VAL_14:.*]] = arith.cmpi sgt, %[[VAL_13:.*]]#1, %[[VAL_4:.*]] : index
+// CHECK: %[[VAL_15:.*]] = arith.select %[[VAL_14:.*]], %[[VAL_13:.*]]#1, %[[VAL_4:.*]] : index
+// CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_9:.*]]#0 : !fir.ref<i32>
+// CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_16:.*]] : (i32) -> i64
+// CHECK: %[[VAL_18:.*]] = fir.shape %[[VAL_15:.*]] : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_10:.*]]#0 (%[[VAL_12:.*]], %[[VAL_5:.*]]:%[[VAL_13:.*]]#1:%[[VAL_5:.*]], %[[VAL_17:.*]]) shape %[[VAL_18:.*]] : (!fir.box<!fir.array<?x?x?xf64>>, i64, index, index, index, i64, !fir.shape<1>) -> !fir.box<!fir.array<?xf64>>
+// CHECK: %[[VAL_20:.*]]:2 = hlfir.copy_in %[[VAL_19:.*]] to %[[VAL_6:.*]] : (!fir.box<!fir.array<?xf64>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.box<!fir.array<?xf64>>, i1)
+// CHECK: %[[VAL_21:.*]] = fir.box_addr %[[VAL_20:.*]]#0 : (!fir.box<!fir.array<?xf64>>) -> !fir.ref<!fir.array<?xf64>>
+// CHECK: %[[VAL_22:.*]]:3 = hlfir.associate %[[VAL_3:.*]] {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
+// CHECK: fir.call @_QFPsb(%[[VAL_21:.*]], %[[VAL_22:.*]]#1) fastmath<contract> : (!fir.ref<!fir.array<?xf64>>, !fir.ref<i32>) -> ()
+// CHECK: hlfir.copy_out %[[VAL_6:.*]], %[[VAL_20:.*]]#1 to %[[VAL_19:.*]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, i1, !fir.box<!fir.array<?xf64>>) -> ()
+// CHECK: hlfir.end_associate %[[VAL_22:.*]]#1, %[[VAL_22:.*]]#2 : !fir.ref<i32>, i1
+// CHECK: return
+// CHECK: }
>From f34d4d56fd9818aa8cedf2a3600ab2205f65e047 Mon Sep 17 00:00:00 2001
From: Kajetan Puchalski <kajetan.puchalski at arm.com>
Date: Thu, 8 May 2025 15:15:56 +0000
Subject: [PATCH 3/4] Address Tom's review comments
Signed-off-by: Kajetan Puchalski <kajetan.puchalski at arm.com>
---
.../HLFIR/Transforms/InlineHLFIRAssign.cpp | 41 +++++++++++--------
1 file changed, 23 insertions(+), 18 deletions(-)
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp
index 38c684eaceb7d..dc545ece8adff 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp
@@ -158,16 +158,16 @@ InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn,
"CopyInOp's WasCopied has no uses");
// The copy out should always be present, either to actually copy or just
// deallocate memory.
- auto *copyOut =
- copyIn.getWasCopied().getUsers().begin().getCurrent().getUser();
+ auto copyOut = mlir::dyn_cast<hlfir::CopyOutOp>(
+ copyIn.getWasCopied().getUsers().begin().getCurrent().getUser());
- if (!mlir::isa<hlfir::CopyOutOp>(copyOut))
+ if (!copyOut)
return rewriter.notifyMatchFailure(copyIn,
"CopyInOp has no direct CopyOut");
// Only inline the copy_in when copy_out does not need to be done, i.e. in
// case of intent(in).
- if (::llvm::cast<hlfir::CopyOutOp>(copyOut).getVar())
+ if (copyOut.getVar())
return rewriter.notifyMatchFailure(copyIn, "CopyIn needs a copy-out");
inputVariable =
@@ -175,7 +175,7 @@ InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn,
mlir::Type resultAddrType = copyIn.getCopiedIn().getType();
mlir::Value isContiguous =
builder.create<fir::IsContiguousBoxOp>(loc, inputVariable);
- auto results =
+ mlir::Operation::result_range results =
builder
.genIfOp(loc, {resultAddrType, builder.getI1Type()}, isContiguous,
/*withElseRegion=*/true)
@@ -195,11 +195,11 @@ InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn,
loc, builder, extents, /*isUnordered=*/true,
flangomp::shouldUseWorkshareLowering(copyIn));
builder.setInsertionPointToStart(loopNest.body);
- auto elem = hlfir::getElementAt(loc, builder, inputVariable,
- loopNest.oneBasedIndices);
+ hlfir::Entity elem = hlfir::getElementAt(
+ loc, builder, inputVariable, loopNest.oneBasedIndices);
elem = hlfir::loadTrivialScalar(loc, builder, elem);
- auto tempElem = hlfir::getElementAt(loc, builder, temp,
- loopNest.oneBasedIndices);
+ hlfir::Entity tempElem = hlfir::getElementAt(
+ loc, builder, temp, loopNest.oneBasedIndices);
builder.create<hlfir::AssignOp>(loc, elem, tempElem);
builder.setInsertionPointAfter(loopNest.outerOp);
@@ -209,9 +209,9 @@ InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn,
if (mlir::isa<fir::BaseBoxType>(temp.getType())) {
result = temp;
} else {
- auto refTy =
+ fir::ReferenceType refTy =
fir::ReferenceType::get(temp.getElementOrSequenceType());
- auto refVal = builder.createConvert(loc, refTy, temp);
+ mlir::Value refVal = builder.createConvert(loc, refTy, temp);
result =
builder.create<fir::EmboxOp>(loc, resultAddrType, refVal);
}
@@ -221,25 +221,30 @@ InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn,
})
.getResults();
- auto addr = results[0];
- auto needsCleanup = results[1];
+ mlir::OpResult addr = results[0];
+ mlir::OpResult needsCleanup = results[1];
builder.setInsertionPoint(copyOut);
- builder.genIfOp(loc, {}, needsCleanup, false).genThen([&] {
+ builder.genIfOp(loc, {}, needsCleanup, /*withElseRegion=*/false).genThen([&] {
auto boxAddr = builder.create<fir::BoxAddrOp>(loc, addr);
- auto heapType = fir::HeapType::get(fir::BoxValue(addr).getBaseTy());
- auto heapVal = builder.createConvert(loc, heapType, boxAddr.getResult());
+ fir::HeapType heapType =
+ fir::HeapType::get(fir::BoxValue(addr).getBaseTy());
+ mlir::Value heapVal =
+ builder.createConvert(loc, heapType, boxAddr.getResult());
builder.create<fir::FreeMemOp>(loc, heapVal);
});
rewriter.eraseOp(copyOut);
- auto tempBox = copyIn.getTempBox();
+ mlir::Value tempBox = copyIn.getTempBox();
rewriter.replaceOp(copyIn, {addr, builder.genNot(loc, isContiguous)});
// The TempBox is only needed for flang-rt calls which we're no longer
- // generating.
+ // generating. It should have no uses left at this stage.
+ if (!tempBox.getUses().empty())
+ return mlir::failure();
rewriter.eraseOp(tempBox.getDefiningOp());
+
return mlir::success();
}
>From cb09635ad4652d384ded06831ba6054fdad80299 Mon Sep 17 00:00:00 2001
From: Kajetan Puchalski <kajetan.puchalski at arm.com>
Date: Thu, 22 May 2025 13:37:53 +0000
Subject: [PATCH 4/4] Separate copy_in inlining into its own pass, add flag
Signed-off-by: Kajetan Puchalski <kajetan.puchalski at arm.com>
---
flang/include/flang/Optimizer/HLFIR/Passes.td | 4 +
.../Optimizer/HLFIR/Transforms/CMakeLists.txt | 1 +
.../HLFIR/Transforms/InlineHLFIRAssign.cpp | 122 ------------
.../HLFIR/Transforms/InlineHLFIRCopyIn.cpp | 180 ++++++++++++++++++
flang/lib/Optimizer/Passes/Pipelines.cpp | 5 +
flang/test/HLFIR/inline-hlfir-assign.fir | 144 --------------
flang/test/HLFIR/inline-hlfir-copy-in.fir | 146 ++++++++++++++
7 files changed, 336 insertions(+), 266 deletions(-)
create mode 100644 flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRCopyIn.cpp
create mode 100644 flang/test/HLFIR/inline-hlfir-copy-in.fir
diff --git a/flang/include/flang/Optimizer/HLFIR/Passes.td b/flang/include/flang/Optimizer/HLFIR/Passes.td
index d445140118073..04d7aec5fe489 100644
--- a/flang/include/flang/Optimizer/HLFIR/Passes.td
+++ b/flang/include/flang/Optimizer/HLFIR/Passes.td
@@ -69,6 +69,10 @@ def InlineHLFIRAssign : Pass<"inline-hlfir-assign"> {
let summary = "Inline hlfir.assign operations";
}
+def InlineHLFIRCopyIn : Pass<"inline-hlfir-copy-in"> {
+ let summary = "Inline hlfir.copy_in operations";
+}
+
def PropagateFortranVariableAttributes : Pass<"propagate-fortran-attrs"> {
let summary = "Propagate FortranVariableFlagsAttr attributes through HLFIR";
}
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt b/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
index d959428ebd203..cc74273d9c5d9 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
@@ -5,6 +5,7 @@ add_flang_library(HLFIRTransforms
ConvertToFIR.cpp
InlineElementals.cpp
InlineHLFIRAssign.cpp
+ InlineHLFIRCopyIn.cpp
LowerHLFIRIntrinsics.cpp
LowerHLFIROrderedAssignments.cpp
ScheduleOrderedAssignments.cpp
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp
index dc545ece8adff..6e209cce07ad4 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp
@@ -13,7 +13,6 @@
#include "flang/Optimizer/Analysis/AliasAnalysis.h"
#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Builder/HLFIRTools.h"
-#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Optimizer/HLFIR/Passes.h"
#include "flang/Optimizer/OpenMP/Passes.h"
@@ -128,126 +127,6 @@ class InlineHLFIRAssignConversion
}
};
-class InlineCopyInConversion : public mlir::OpRewritePattern<hlfir::CopyInOp> {
-public:
- using mlir::OpRewritePattern<hlfir::CopyInOp>::OpRewritePattern;
-
- llvm::LogicalResult
- matchAndRewrite(hlfir::CopyInOp copyIn,
- mlir::PatternRewriter &rewriter) const override;
-};
-
-llvm::LogicalResult
-InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn,
- mlir::PatternRewriter &rewriter) const {
- fir::FirOpBuilder builder(rewriter, copyIn.getOperation());
- mlir::Location loc = copyIn.getLoc();
- hlfir::Entity inputVariable{copyIn.getVar()};
- if (!fir::isa_trivial(inputVariable.getFortranElementType()))
- return rewriter.notifyMatchFailure(copyIn,
- "CopyInOp's data type is not trivial");
-
- if (fir::isPointerType(inputVariable.getType()))
- return rewriter.notifyMatchFailure(
- copyIn, "CopyInOp's input variable is a pointer");
-
- // There should be exactly one user of WasCopied - the corresponding
- // CopyOutOp.
- if (copyIn.getWasCopied().getUses().empty())
- return rewriter.notifyMatchFailure(copyIn,
- "CopyInOp's WasCopied has no uses");
- // The copy out should always be present, either to actually copy or just
- // deallocate memory.
- auto copyOut = mlir::dyn_cast<hlfir::CopyOutOp>(
- copyIn.getWasCopied().getUsers().begin().getCurrent().getUser());
-
- if (!copyOut)
- return rewriter.notifyMatchFailure(copyIn,
- "CopyInOp has no direct CopyOut");
-
- // Only inline the copy_in when copy_out does not need to be done, i.e. in
- // case of intent(in).
- if (copyOut.getVar())
- return rewriter.notifyMatchFailure(copyIn, "CopyIn needs a copy-out");
-
- inputVariable =
- hlfir::derefPointersAndAllocatables(loc, builder, inputVariable);
- mlir::Type resultAddrType = copyIn.getCopiedIn().getType();
- mlir::Value isContiguous =
- builder.create<fir::IsContiguousBoxOp>(loc, inputVariable);
- mlir::Operation::result_range results =
- builder
- .genIfOp(loc, {resultAddrType, builder.getI1Type()}, isContiguous,
- /*withElseRegion=*/true)
- .genThen([&]() {
- mlir::Value falseVal = builder.create<mlir::arith::ConstantOp>(
- loc, builder.getI1Type(), builder.getBoolAttr(false));
- builder.create<fir::ResultOp>(
- loc, mlir::ValueRange{inputVariable, falseVal});
- })
- .genElse([&] {
- auto [temp, cleanup] =
- hlfir::createTempFromMold(loc, builder, inputVariable);
- mlir::Value shape = hlfir::genShape(loc, builder, inputVariable);
- llvm::SmallVector<mlir::Value> extents =
- hlfir::getIndexExtents(loc, builder, shape);
- hlfir::LoopNest loopNest = hlfir::genLoopNest(
- loc, builder, extents, /*isUnordered=*/true,
- flangomp::shouldUseWorkshareLowering(copyIn));
- builder.setInsertionPointToStart(loopNest.body);
- hlfir::Entity elem = hlfir::getElementAt(
- loc, builder, inputVariable, loopNest.oneBasedIndices);
- elem = hlfir::loadTrivialScalar(loc, builder, elem);
- hlfir::Entity tempElem = hlfir::getElementAt(
- loc, builder, temp, loopNest.oneBasedIndices);
- builder.create<hlfir::AssignOp>(loc, elem, tempElem);
- builder.setInsertionPointAfter(loopNest.outerOp);
-
- mlir::Value result;
- // Make sure the result is always a boxed array by boxing it
- // ourselves if need be.
- if (mlir::isa<fir::BaseBoxType>(temp.getType())) {
- result = temp;
- } else {
- fir::ReferenceType refTy =
- fir::ReferenceType::get(temp.getElementOrSequenceType());
- mlir::Value refVal = builder.createConvert(loc, refTy, temp);
- result =
- builder.create<fir::EmboxOp>(loc, resultAddrType, refVal);
- }
-
- builder.create<fir::ResultOp>(loc,
- mlir::ValueRange{result, cleanup});
- })
- .getResults();
-
- mlir::OpResult addr = results[0];
- mlir::OpResult needsCleanup = results[1];
-
- builder.setInsertionPoint(copyOut);
- builder.genIfOp(loc, {}, needsCleanup, /*withElseRegion=*/false).genThen([&] {
- auto boxAddr = builder.create<fir::BoxAddrOp>(loc, addr);
- fir::HeapType heapType =
- fir::HeapType::get(fir::BoxValue(addr).getBaseTy());
- mlir::Value heapVal =
- builder.createConvert(loc, heapType, boxAddr.getResult());
- builder.create<fir::FreeMemOp>(loc, heapVal);
- });
- rewriter.eraseOp(copyOut);
-
- mlir::Value tempBox = copyIn.getTempBox();
-
- rewriter.replaceOp(copyIn, {addr, builder.genNot(loc, isContiguous)});
-
- // The TempBox is only needed for flang-rt calls which we're no longer
- // generating. It should have no uses left at this stage.
- if (!tempBox.getUses().empty())
- return mlir::failure();
- rewriter.eraseOp(tempBox.getDefiningOp());
-
- return mlir::success();
-}
-
class InlineHLFIRAssignPass
: public hlfir::impl::InlineHLFIRAssignBase<InlineHLFIRAssignPass> {
public:
@@ -261,7 +140,6 @@ class InlineHLFIRAssignPass
mlir::RewritePatternSet patterns(context);
patterns.insert<InlineHLFIRAssignConversion>(context);
- patterns.insert<InlineCopyInConversion>(context);
if (mlir::failed(mlir::applyPatternsGreedily(
getOperation(), std::move(patterns), config))) {
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRCopyIn.cpp b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRCopyIn.cpp
new file mode 100644
index 0000000000000..40a6bb49581eb
--- /dev/null
+++ b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRCopyIn.cpp
@@ -0,0 +1,180 @@
+//===- InlineHLFIRCopyIn.cpp - Inline hlfir.copy_in ops ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Transform hlfir.copy_in array operations into loop nests performing element
+// per element assignments. For simplicity, the inlining is done for trivial
+// data types when the copy_in does not require a corresponding copy_out and
+// when the input array is not behind a pointer. This may change in the future.
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Builder/HLFIRTools.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
+#include "flang/Optimizer/HLFIR/HLFIROps.h"
+#include "flang/Optimizer/OpenMP/Passes.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Support/LLVM.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+
+namespace hlfir {
+#define GEN_PASS_DEF_INLINEHLFIRCOPYIN
+#include "flang/Optimizer/HLFIR/Passes.h.inc"
+} // namespace hlfir
+
+#define DEBUG_TYPE "inline-hlfir-copy-in"
+
+static llvm::cl::opt<bool> noInlineHLFIRCopyIn(
+ "no-inline-hlfir-copy-in",
+ llvm::cl::desc("Do not inline hlfir.copy_in operations"),
+ llvm::cl::init(false));
+
+namespace {
+class InlineCopyInConversion : public mlir::OpRewritePattern<hlfir::CopyInOp> {
+public:
+ using mlir::OpRewritePattern<hlfir::CopyInOp>::OpRewritePattern;
+
+ llvm::LogicalResult
+ matchAndRewrite(hlfir::CopyInOp copyIn,
+ mlir::PatternRewriter &rewriter) const override;
+};
+
+llvm::LogicalResult
+InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn,
+ mlir::PatternRewriter &rewriter) const {
+ fir::FirOpBuilder builder(rewriter, copyIn.getOperation());
+ mlir::Location loc = copyIn.getLoc();
+ hlfir::Entity inputVariable{copyIn.getVar()};
+ if (!fir::isa_trivial(inputVariable.getFortranElementType()))
+ return rewriter.notifyMatchFailure(copyIn,
+ "CopyInOp's data type is not trivial");
+
+ if (fir::isPointerType(inputVariable.getType()))
+ return rewriter.notifyMatchFailure(
+ copyIn, "CopyInOp's input variable is a pointer");
+
+ // There should be exactly one user of WasCopied - the corresponding
+ // CopyOutOp.
+ if (copyIn.getWasCopied().getUses().empty())
+ return rewriter.notifyMatchFailure(copyIn,
+ "CopyInOp's WasCopied has no uses");
+ // The copy out should always be present, either to actually copy or just
+ // deallocate memory.
+ auto copyOut = mlir::dyn_cast<hlfir::CopyOutOp>(
+ copyIn.getWasCopied().getUsers().begin().getCurrent().getUser());
+
+ if (!copyOut)
+ return rewriter.notifyMatchFailure(copyIn,
+ "CopyInOp has no direct CopyOut");
+
+ // Only inline the copy_in when copy_out does not need to be done, i.e. in
+ // case of intent(in).
+ if (copyOut.getVar())
+ return rewriter.notifyMatchFailure(copyIn, "CopyIn needs a copy-out");
+
+ inputVariable =
+ hlfir::derefPointersAndAllocatables(loc, builder, inputVariable);
+ mlir::Type resultAddrType = copyIn.getCopiedIn().getType();
+ mlir::Value isContiguous =
+ builder.create<fir::IsContiguousBoxOp>(loc, inputVariable);
+ mlir::Operation::result_range results =
+ builder
+ .genIfOp(loc, {resultAddrType, builder.getI1Type()}, isContiguous,
+ /*withElseRegion=*/true)
+ .genThen([&]() {
+ mlir::Value falseVal = builder.create<mlir::arith::ConstantOp>(
+ loc, builder.getI1Type(), builder.getBoolAttr(false));
+ builder.create<fir::ResultOp>(
+ loc, mlir::ValueRange{inputVariable, falseVal});
+ })
+ .genElse([&] {
+ auto [temp, cleanup] =
+ hlfir::createTempFromMold(loc, builder, inputVariable);
+ mlir::Value shape = hlfir::genShape(loc, builder, inputVariable);
+ llvm::SmallVector<mlir::Value> extents =
+ hlfir::getIndexExtents(loc, builder, shape);
+ hlfir::LoopNest loopNest = hlfir::genLoopNest(
+ loc, builder, extents, /*isUnordered=*/true,
+ flangomp::shouldUseWorkshareLowering(copyIn));
+ builder.setInsertionPointToStart(loopNest.body);
+ hlfir::Entity elem = hlfir::getElementAt(
+ loc, builder, inputVariable, loopNest.oneBasedIndices);
+ elem = hlfir::loadTrivialScalar(loc, builder, elem);
+ hlfir::Entity tempElem = hlfir::getElementAt(
+ loc, builder, temp, loopNest.oneBasedIndices);
+ builder.create<hlfir::AssignOp>(loc, elem, tempElem);
+ builder.setInsertionPointAfter(loopNest.outerOp);
+
+ mlir::Value result;
+ // Make sure the result is always a boxed array by boxing it
+ // ourselves if need be.
+ if (mlir::isa<fir::BaseBoxType>(temp.getType())) {
+ result = temp;
+ } else {
+ fir::ReferenceType refTy =
+ fir::ReferenceType::get(temp.getElementOrSequenceType());
+ mlir::Value refVal = builder.createConvert(loc, refTy, temp);
+ result =
+ builder.create<fir::EmboxOp>(loc, resultAddrType, refVal);
+ }
+
+ builder.create<fir::ResultOp>(loc,
+ mlir::ValueRange{result, cleanup});
+ })
+ .getResults();
+
+ mlir::OpResult addr = results[0];
+ mlir::OpResult needsCleanup = results[1];
+
+ builder.setInsertionPoint(copyOut);
+ builder.genIfOp(loc, {}, needsCleanup, /*withElseRegion=*/false).genThen([&] {
+ auto boxAddr = builder.create<fir::BoxAddrOp>(loc, addr);
+ fir::HeapType heapType =
+ fir::HeapType::get(fir::BoxValue(addr).getBaseTy());
+ mlir::Value heapVal =
+ builder.createConvert(loc, heapType, boxAddr.getResult());
+ builder.create<fir::FreeMemOp>(loc, heapVal);
+ });
+ rewriter.eraseOp(copyOut);
+
+ mlir::Value tempBox = copyIn.getTempBox();
+
+ rewriter.replaceOp(copyIn, {addr, builder.genNot(loc, isContiguous)});
+
+ // The TempBox is only needed for flang-rt calls which we're no longer
+ // generating. It should have no uses left at this stage.
+ if (!tempBox.getUses().empty())
+ return mlir::failure();
+ rewriter.eraseOp(tempBox.getDefiningOp());
+
+ return mlir::success();
+}
+
+class InlineHLFIRCopyInPass
+ : public hlfir::impl::InlineHLFIRCopyInBase<InlineHLFIRCopyInPass> {
+public:
+ void runOnOperation() override {
+ mlir::MLIRContext *context = &getContext();
+
+ mlir::GreedyRewriteConfig config;
+ // Prevent the pattern driver from merging blocks.
+ config.setRegionSimplificationLevel(
+ mlir::GreedySimplifyRegionLevel::Disabled);
+
+ mlir::RewritePatternSet patterns(context);
+ if (!noInlineHLFIRCopyIn) {
+ patterns.insert<InlineCopyInConversion>(context);
+ }
+
+ if (mlir::failed(mlir::applyPatternsGreedily(
+ getOperation(), std::move(patterns), config))) {
+ mlir::emitError(getOperation()->getLoc(),
+ "failure in hlfir.copy_in inlining");
+ signalPassFailure();
+ }
+ }
+};
+} // namespace
diff --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp
index 77751908e35be..1779623fddc5a 100644
--- a/flang/lib/Optimizer/Passes/Pipelines.cpp
+++ b/flang/lib/Optimizer/Passes/Pipelines.cpp
@@ -255,6 +255,11 @@ void createHLFIRToFIRPassPipeline(mlir::PassManager &pm, bool enableOpenMP,
pm, hlfir::createOptimizedBufferization);
addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, hlfir::createInlineHLFIRAssign);
+
+ if (optLevel == llvm::OptimizationLevel::O3) {
+ addNestedPassToAllTopLevelOperations<PassConstructor>(
+ pm, hlfir::createInlineHLFIRCopyIn);
+ }
}
pm.addPass(hlfir::createLowerHLFIROrderedAssignments());
pm.addPass(hlfir::createLowerHLFIRIntrinsics());
diff --git a/flang/test/HLFIR/inline-hlfir-assign.fir b/flang/test/HLFIR/inline-hlfir-assign.fir
index df7681b9c5c16..f834e7971e3d5 100644
--- a/flang/test/HLFIR/inline-hlfir-assign.fir
+++ b/flang/test/HLFIR/inline-hlfir-assign.fir
@@ -353,147 +353,3 @@ func.func @_QPtest_expr_rhs(%arg0: !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.
// CHECK: hlfir.destroy %[[VAL_10]] : !hlfir.expr<?x!fir.logical<4>>
// CHECK: return
// CHECK: }
-
-// Test inlining of hlfir.copy_in that does not require the array to be copied out
-func.func private @_test_inline_copy_in(%arg0: !fir.box<!fir.array<?x?x?xf64>> {fir.bindc_name = "x"}, %arg1: !fir.ref<i32> {fir.bindc_name = "i"}, %arg2: !fir.ref<i32> {fir.bindc_name = "j"}) {
- %0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf64>>>
- %1 = fir.dummy_scope : !fir.dscope
- %2:2 = hlfir.declare %arg1 dummy_scope %1 {uniq_name = "_QFFsb2Ei"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
- %3:2 = hlfir.declare %arg2 dummy_scope %1 {uniq_name = "_QFFsb2Ej"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
- %4:2 = hlfir.declare %arg0 dummy_scope %1 {uniq_name = "_QFFsb2Ex"} : (!fir.box<!fir.array<?x?x?xf64>>, !fir.dscope) -> (!fir.box<!fir.array<?x?x?xf64>>, !fir.box<!fir.array<?x?x?xf64>>)
- %5 = fir.load %2#0 : !fir.ref<i32>
- %6 = fir.convert %5 : (i32) -> i64
- %c1 = arith.constant 1 : index
- %c1_0 = arith.constant 1 : index
- %7:3 = fir.box_dims %4#1, %c1_0 : (!fir.box<!fir.array<?x?x?xf64>>, index) -> (index, index, index)
- %c1_1 = arith.constant 1 : index
- %c0 = arith.constant 0 : index
- %8 = arith.subi %7#1, %c1 : index
- %9 = arith.addi %8, %c1_1 : index
- %10 = arith.divsi %9, %c1_1 : index
- %11 = arith.cmpi sgt, %10, %c0 : index
- %12 = arith.select %11, %10, %c0 : index
- %13 = fir.load %3#0 : !fir.ref<i32>
- %14 = fir.convert %13 : (i32) -> i64
- %15 = fir.shape %12 : (index) -> !fir.shape<1>
- %16 = hlfir.designate %4#0 (%6, %c1:%7#1:%c1_1, %14) shape %15 : (!fir.box<!fir.array<?x?x?xf64>>, i64, index, index, index, i64, !fir.shape<1>) -> !fir.box<!fir.array<?xf64>>
- %c100_i32 = arith.constant 100 : i32
- %17:2 = hlfir.copy_in %16 to %0 : (!fir.box<!fir.array<?xf64>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.box<!fir.array<?xf64>>, i1)
- %18 = fir.box_addr %17#0 : (!fir.box<!fir.array<?xf64>>) -> !fir.ref<!fir.array<?xf64>>
- %19:3 = hlfir.associate %c100_i32 {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
- fir.call @_QFPsb(%18, %19#0) fastmath<contract> : (!fir.ref<!fir.array<?xf64>>, !fir.ref<i32>) -> ()
- hlfir.copy_out %0, %17#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, i1) -> ()
- hlfir.end_associate %19#1, %19#2 : !fir.ref<i32>, i1
- return
-}
-
-// CHECK-LABEL: func.func private @_test_inline_copy_in(
-// CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<?x?x?xf64>> {fir.bindc_name = "x"},
-// CHECK-SAME: %[[VAL_1:.*]]: !fir.ref<i32> {fir.bindc_name = "i"},
-// CHECK-SAME: %[[VAL_2:.*]]: !fir.ref<i32> {fir.bindc_name = "j"}) {
-// CHECK: %[[VAL_3:.*]] = arith.constant true
-// CHECK: %[[VAL_4:.*]] = arith.constant false
-// CHECK: %[[VAL_5:.*]] = arith.constant 100 : i32
-// CHECK: %[[VAL_6:.*]] = arith.constant 0 : index
-// CHECK: %[[VAL_7:.*]] = arith.constant 1 : index
-// CHECK: %[[VAL_8:.*]] = fir.dummy_scope : !fir.dscope
-// CHECK: %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_1:.*]] dummy_scope %[[VAL_8:.*]] {uniq_name = "_QFFsb2Ei"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
-// CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_2:.*]] dummy_scope %[[VAL_8:.*]] {uniq_name = "_QFFsb2Ej"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
-// CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_0:.*]] dummy_scope %[[VAL_8:.*]] {uniq_name = "_QFFsb2Ex"} : (!fir.box<!fir.array<?x?x?xf64>>, !fir.dscope) -> (!fir.box<!fir.array<?x?x?xf64>>, !fir.box<!fir.array<?x?x?xf64>>)
-// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_22:.*]]#0 : !fir.ref<i32>
-// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11:.*]] : (i32) -> i64
-// CHECK: %[[VAL_13:.*]]:3 = fir.box_dims %[[VAL_10:.*]]#1, %[[VAL_7:.*]] : (!fir.box<!fir.array<?x?x?xf64>>, index) -> (index, index, index)
-// CHECK: %[[VAL_14:.*]] = arith.cmpi sgt, %[[VAL_13:.*]]#1, %[[VAL_6:.*]] : index
-// CHECK: %[[VAL_15:.*]] = arith.select %[[VAL_14:.*]], %[[VAL_13:.*]]#1, %[[VAL_6:.*]] : index
-// CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_9:.*]]#0 : !fir.ref<i32>
-// CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_16:.*]] : (i32) -> i64
-// CHECK: %[[VAL_18:.*]] = fir.shape %[[VAL_15:.*]] : (index) -> !fir.shape<1>
-// CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_10:.*]]#0 (%[[VAL_12:.*]], %[[VAL_7:.*]]:%[[VAL_13:.*]]#1:%[[VAL_7:.*]], %[[VAL_17:.*]]) shape %[[VAL_18:.*]] : (!fir.box<!fir.array<?x?x?xf64>>, i64, index, index, index, i64, !fir.shape<1>) -> !fir.box<!fir.array<?xf64>>
-// CHECK: %[[VAL_20:.*]] = fir.is_contiguous_box %[[VAL_19:.*]] whole : (!fir.box<!fir.array<?xf64>>) -> i1
-// CHECK: %[[VAL_21:.*]]:2 = fir.if %[[VAL_20:.*]] -> (!fir.box<!fir.array<?xf64>>, i1) {
-// CHECK: fir.result %[[VAL_19:.*]], %[[VAL_4:.*]] : !fir.box<!fir.array<?xf64>>, i1
-// CHECK: } else {
-// CHECK: %[[VAL_24:.*]] = fir.allocmem !fir.array<?xf64>, %[[VAL_15:.*]] {bindc_name = ".tmp", uniq_name = ""}
-// CHECK: %[[VAL_25:.*]]:2 = hlfir.declare %[[VAL_24:.*]](%[[VAL_18:.*]]) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?xf64>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xf64>>, !fir.heap<!fir.array<?xf64>>)
-// CHECK: fir.do_loop %arg3 = %[[VAL_7:.*]] to %[[VAL_15:.*]] step %[[VAL_7:.*]] unordered {
-// CHECK: %[[VAL_26:.*]] = hlfir.designate %[[VAL_19:.*]] (%arg3) : (!fir.box<!fir.array<?xf64>>, index) -> !fir.ref<f64>
-// CHECK: %[[VAL_27:.*]] = fir.load %[[VAL_26:.*]] : !fir.ref<f64>
-// CHECK: %[[VAL_28:.*]] = hlfir.designate %[[VAL_25:.*]]#0 (%arg3) : (!fir.box<!fir.array<?xf64>>, index) -> !fir.ref<f64>
-// CHECK: hlfir.assign %[[VAL_27:.*]] to %[[VAL_28:.*]] : f64, !fir.ref<f64>
-// CHECK: }
-// CHECK: fir.result %[[VAL_25:.*]]#0, %[[VAL_3:.*]] : !fir.box<!fir.array<?xf64>>, i1
-// CHECK: }
-// CHECK: %[[VAL_22:.*]] = fir.box_addr %[[VAL_21:.*]]#0 : (!fir.box<!fir.array<?xf64>>) -> !fir.ref<!fir.array<?xf64>>
-// CHECK: %[[VAL_23:.*]]:3 = hlfir.associate %[[VAL_5:.*]] {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
-// CHECK: fir.call @_QFPsb(%[[VAL_22:.*]], %[[VAL_23:.*]]#0) fastmath<contract> : (!fir.ref<!fir.array<?xf64>>, !fir.ref<i32>) -> ()
-// CHECK: fir.if %[[VAL_21:.*]]#1 {
-// CHECK: %[[VAL_24:.*]] = fir.box_addr %[[VAL_21:.*]]#0 : (!fir.box<!fir.array<?xf64>>) -> !fir.ref<!fir.array<?xf64>>
-// CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_24:.*]] : (!fir.ref<!fir.array<?xf64>>) -> !fir.heap<!fir.array<?xf64>>
-// CHECK: fir.freemem %[[VAL_25:.*]] : !fir.heap<!fir.array<?xf64>>
-// CHECK: }
-// CHECK: hlfir.end_associate %[[VAL_23:.*]]#1, %[[VAL_23:.*]]#2 : !fir.ref<i32>, i1
-// CHECK: return
-// CHECK: }
-
-// Test not inlining of hlfir.copy_in that requires the array to be copied out
-func.func private @_test_no_inline_copy_in(%arg0: !fir.box<!fir.array<?x?x?xf64>> {fir.bindc_name = "x"}, %arg1: !fir.ref<i32> {fir.bindc_name = "i"}, %arg2: !fir.ref<i32> {fir.bindc_name = "j"}) {
- %0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf64>>>
- %1 = fir.dummy_scope : !fir.dscope
- %2:2 = hlfir.declare %arg1 dummy_scope %1 {uniq_name = "_QFFsb2Ei"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
- %3:2 = hlfir.declare %arg2 dummy_scope %1 {uniq_name = "_QFFsb2Ej"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
- %4:2 = hlfir.declare %arg0 dummy_scope %1 {uniq_name = "_QFFsb2Ex"} : (!fir.box<!fir.array<?x?x?xf64>>, !fir.dscope) -> (!fir.box<!fir.array<?x?x?xf64>>, !fir.box<!fir.array<?x?x?xf64>>)
- %5 = fir.load %2#0 : !fir.ref<i32>
- %6 = fir.convert %5 : (i32) -> i64
- %c1 = arith.constant 1 : index
- %c1_0 = arith.constant 1 : index
- %7:3 = fir.box_dims %4#1, %c1_0 : (!fir.box<!fir.array<?x?x?xf64>>, index) -> (index, index, index)
- %c1_1 = arith.constant 1 : index
- %c0 = arith.constant 0 : index
- %8 = arith.subi %7#1, %c1 : index
- %9 = arith.addi %8, %c1_1 : index
- %10 = arith.divsi %9, %c1_1 : index
- %11 = arith.cmpi sgt, %10, %c0 : index
- %12 = arith.select %11, %10, %c0 : index
- %13 = fir.load %3#0 : !fir.ref<i32>
- %14 = fir.convert %13 : (i32) -> i64
- %15 = fir.shape %12 : (index) -> !fir.shape<1>
- %16 = hlfir.designate %4#0 (%6, %c1:%7#1:%c1_1, %14) shape %15 : (!fir.box<!fir.array<?x?x?xf64>>, i64, index, index, index, i64, !fir.shape<1>) -> !fir.box<!fir.array<?xf64>>
- %c100_i32 = arith.constant 100 : i32
- %17:2 = hlfir.copy_in %16 to %0 : (!fir.box<!fir.array<?xf64>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.box<!fir.array<?xf64>>, i1)
- %18 = fir.box_addr %17#0 : (!fir.box<!fir.array<?xf64>>) -> !fir.ref<!fir.array<?xf64>>
- %19:3 = hlfir.associate %c100_i32 {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
- fir.call @_QFPsb(%18, %19#1) fastmath<contract> : (!fir.ref<!fir.array<?xf64>>, !fir.ref<i32>) -> ()
- hlfir.copy_out %0, %17#1 to %16 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, i1, !fir.box<!fir.array<?xf64>>) -> ()
- hlfir.end_associate %19#1, %19#2 : !fir.ref<i32>, i1
- return
-}
-
-// CHECK-LABEL: func.func private @_test_no_inline_copy_in(
-// CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<?x?x?xf64>> {fir.bindc_name = "x"},
-// CHECK-SAME: %[[VAL_1:.*]]: !fir.ref<i32> {fir.bindc_name = "i"},
-// CHECK-SAME: %[[VAL_2:.*]]: !fir.ref<i32> {fir.bindc_name = "j"}) {
-// CHECK: %[[VAL_3:.*]] = arith.constant 100 : i32
-// CHECK: %[[VAL_4:.*]] = arith.constant 0 : index
-// CHECK: %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK: %[[VAL_6:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf64>>>
-// CHECK: %[[VAL_7:.*]] = fir.dummy_scope : !fir.dscope
-// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_1:.*]] dummy_scope %[[VAL_7:.*]] {uniq_name = "_QFFsb2Ei"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
-// CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_2:.*]] dummy_scope %[[VAL_7:.*]] {uniq_name = "_QFFsb2Ej"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
-// CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_0:.*]] dummy_scope %[[VAL_7:.*]] {uniq_name = "_QFFsb2Ex"} : (!fir.box<!fir.array<?x?x?xf64>>, !fir.dscope) -> (!fir.box<!fir.array<?x?x?xf64>>, !fir.box<!fir.array<?x?x?xf64>>)
-// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_8:.*]]#0 : !fir.ref<i32>
-// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11:.*]] : (i32) -> i64
-// CHECK: %[[VAL_13:.*]]:3 = fir.box_dims %[[VAL_10:.*]]#1, %[[VAL_5:.*]] : (!fir.box<!fir.array<?x?x?xf64>>, index) -> (index, index, index)
-// CHECK: %[[VAL_14:.*]] = arith.cmpi sgt, %[[VAL_13:.*]]#1, %[[VAL_4:.*]] : index
-// CHECK: %[[VAL_15:.*]] = arith.select %[[VAL_14:.*]], %[[VAL_13:.*]]#1, %[[VAL_4:.*]] : index
-// CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_9:.*]]#0 : !fir.ref<i32>
-// CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_16:.*]] : (i32) -> i64
-// CHECK: %[[VAL_18:.*]] = fir.shape %[[VAL_15:.*]] : (index) -> !fir.shape<1>
-// CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_10:.*]]#0 (%[[VAL_12:.*]], %[[VAL_5:.*]]:%[[VAL_13:.*]]#1:%[[VAL_5:.*]], %[[VAL_17:.*]]) shape %[[VAL_18:.*]] : (!fir.box<!fir.array<?x?x?xf64>>, i64, index, index, index, i64, !fir.shape<1>) -> !fir.box<!fir.array<?xf64>>
-// CHECK: %[[VAL_20:.*]]:2 = hlfir.copy_in %[[VAL_19:.*]] to %[[VAL_6:.*]] : (!fir.box<!fir.array<?xf64>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.box<!fir.array<?xf64>>, i1)
-// CHECK: %[[VAL_21:.*]] = fir.box_addr %[[VAL_20:.*]]#0 : (!fir.box<!fir.array<?xf64>>) -> !fir.ref<!fir.array<?xf64>>
-// CHECK: %[[VAL_22:.*]]:3 = hlfir.associate %[[VAL_3:.*]] {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
-// CHECK: fir.call @_QFPsb(%[[VAL_21:.*]], %[[VAL_22:.*]]#1) fastmath<contract> : (!fir.ref<!fir.array<?xf64>>, !fir.ref<i32>) -> ()
-// CHECK: hlfir.copy_out %[[VAL_6:.*]], %[[VAL_20:.*]]#1 to %[[VAL_19:.*]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, i1, !fir.box<!fir.array<?xf64>>) -> ()
-// CHECK: hlfir.end_associate %[[VAL_22:.*]]#1, %[[VAL_22:.*]]#2 : !fir.ref<i32>, i1
-// CHECK: return
-// CHECK: }
diff --git a/flang/test/HLFIR/inline-hlfir-copy-in.fir b/flang/test/HLFIR/inline-hlfir-copy-in.fir
new file mode 100644
index 0000000000000..7140e93f19979
--- /dev/null
+++ b/flang/test/HLFIR/inline-hlfir-copy-in.fir
@@ -0,0 +1,146 @@
+// Test inlining of hlfir.copy_in
+// RUN: fir-opt --inline-hlfir-copy-in %s | FileCheck %s
+
+// Test inlining of hlfir.copy_in that does not require the array to be copied out
+func.func private @_test_inline_copy_in(%arg0: !fir.box<!fir.array<?x?x?xf64>> {fir.bindc_name = "x"}, %arg1: !fir.ref<i32> {fir.bindc_name = "i"}, %arg2: !fir.ref<i32> {fir.bindc_name = "j"}) {
+ %0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf64>>>
+ %1 = fir.dummy_scope : !fir.dscope
+ %2:2 = hlfir.declare %arg1 dummy_scope %1 {uniq_name = "_QFFsb2Ei"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %3:2 = hlfir.declare %arg2 dummy_scope %1 {uniq_name = "_QFFsb2Ej"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %4:2 = hlfir.declare %arg0 dummy_scope %1 {uniq_name = "_QFFsb2Ex"} : (!fir.box<!fir.array<?x?x?xf64>>, !fir.dscope) -> (!fir.box<!fir.array<?x?x?xf64>>, !fir.box<!fir.array<?x?x?xf64>>)
+ %5 = fir.load %2#0 : !fir.ref<i32>
+ %6 = fir.convert %5 : (i32) -> i64
+ %c1 = arith.constant 1 : index
+ %c1_0 = arith.constant 1 : index
+ %7:3 = fir.box_dims %4#1, %c1_0 : (!fir.box<!fir.array<?x?x?xf64>>, index) -> (index, index, index)
+ %c1_1 = arith.constant 1 : index
+ %c0 = arith.constant 0 : index
+ %8 = arith.subi %7#1, %c1 : index
+ %9 = arith.addi %8, %c1_1 : index
+ %10 = arith.divsi %9, %c1_1 : index
+ %11 = arith.cmpi sgt, %10, %c0 : index
+ %12 = arith.select %11, %10, %c0 : index
+ %13 = fir.load %3#0 : !fir.ref<i32>
+ %14 = fir.convert %13 : (i32) -> i64
+ %15 = fir.shape %12 : (index) -> !fir.shape<1>
+ %16 = hlfir.designate %4#0 (%6, %c1:%7#1:%c1_1, %14) shape %15 : (!fir.box<!fir.array<?x?x?xf64>>, i64, index, index, index, i64, !fir.shape<1>) -> !fir.box<!fir.array<?xf64>>
+ %c100_i32 = arith.constant 100 : i32
+ %17:2 = hlfir.copy_in %16 to %0 : (!fir.box<!fir.array<?xf64>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.box<!fir.array<?xf64>>, i1)
+ %18 = fir.box_addr %17#0 : (!fir.box<!fir.array<?xf64>>) -> !fir.ref<!fir.array<?xf64>>
+ %19:3 = hlfir.associate %c100_i32 {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
+ fir.call @_QFPsb(%18, %19#0) fastmath<contract> : (!fir.ref<!fir.array<?xf64>>, !fir.ref<i32>) -> ()
+ hlfir.copy_out %0, %17#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, i1) -> ()
+ hlfir.end_associate %19#1, %19#2 : !fir.ref<i32>, i1
+ return
+}
+
+// CHECK-LABEL: func.func private @_test_inline_copy_in(
+// CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<?x?x?xf64>> {fir.bindc_name = "x"},
+// CHECK-SAME: %[[VAL_1:.*]]: !fir.ref<i32> {fir.bindc_name = "i"},
+// CHECK-SAME: %[[VAL_2:.*]]: !fir.ref<i32> {fir.bindc_name = "j"}) {
+// CHECK: %[[VAL_3:.*]] = arith.constant true
+// CHECK: %[[VAL_4:.*]] = arith.constant false
+// CHECK: %[[VAL_5:.*]] = arith.constant 100 : i32
+// CHECK: %[[VAL_6:.*]] = arith.constant 0 : index
+// CHECK: %[[VAL_7:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_8:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK: %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_1:.*]] dummy_scope %[[VAL_8:.*]] {uniq_name = "_QFFsb2Ei"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_2:.*]] dummy_scope %[[VAL_8:.*]] {uniq_name = "_QFFsb2Ej"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_0:.*]] dummy_scope %[[VAL_8:.*]] {uniq_name = "_QFFsb2Ex"} : (!fir.box<!fir.array<?x?x?xf64>>, !fir.dscope) -> (!fir.box<!fir.array<?x?x?xf64>>, !fir.box<!fir.array<?x?x?xf64>>)
+// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_22:.*]]#0 : !fir.ref<i32>
+// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11:.*]] : (i32) -> i64
+// CHECK: %[[VAL_13:.*]]:3 = fir.box_dims %[[VAL_10:.*]]#1, %[[VAL_7:.*]] : (!fir.box<!fir.array<?x?x?xf64>>, index) -> (index, index, index)
+// CHECK: %[[VAL_14:.*]] = arith.cmpi sgt, %[[VAL_13:.*]]#1, %[[VAL_6:.*]] : index
+// CHECK: %[[VAL_15:.*]] = arith.select %[[VAL_14:.*]], %[[VAL_13:.*]]#1, %[[VAL_6:.*]] : index
+// CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_9:.*]]#0 : !fir.ref<i32>
+// CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_16:.*]] : (i32) -> i64
+// CHECK: %[[VAL_18:.*]] = fir.shape %[[VAL_15:.*]] : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_10:.*]]#0 (%[[VAL_12:.*]], %[[VAL_7:.*]]:%[[VAL_13:.*]]#1:%[[VAL_7:.*]], %[[VAL_17:.*]]) shape %[[VAL_18:.*]] : (!fir.box<!fir.array<?x?x?xf64>>, i64, index, index, index, i64, !fir.shape<1>) -> !fir.box<!fir.array<?xf64>>
+// CHECK: %[[VAL_20:.*]] = fir.is_contiguous_box %[[VAL_19:.*]] whole : (!fir.box<!fir.array<?xf64>>) -> i1
+// CHECK: %[[VAL_21:.*]]:2 = fir.if %[[VAL_20:.*]] -> (!fir.box<!fir.array<?xf64>>, i1) {
+// CHECK: fir.result %[[VAL_19:.*]], %[[VAL_4:.*]] : !fir.box<!fir.array<?xf64>>, i1
+// CHECK: } else {
+// CHECK: %[[VAL_24:.*]] = fir.allocmem !fir.array<?xf64>, %[[VAL_15:.*]] {bindc_name = ".tmp", uniq_name = ""}
+// CHECK: %[[VAL_25:.*]]:2 = hlfir.declare %[[VAL_24:.*]](%[[VAL_18:.*]]) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?xf64>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xf64>>, !fir.heap<!fir.array<?xf64>>)
+// CHECK: fir.do_loop %arg3 = %[[VAL_7:.*]] to %[[VAL_15:.*]] step %[[VAL_7:.*]] unordered {
+// CHECK: %[[VAL_26:.*]] = hlfir.designate %[[VAL_19:.*]] (%arg3) : (!fir.box<!fir.array<?xf64>>, index) -> !fir.ref<f64>
+// CHECK: %[[VAL_27:.*]] = fir.load %[[VAL_26:.*]] : !fir.ref<f64>
+// CHECK: %[[VAL_28:.*]] = hlfir.designate %[[VAL_25:.*]]#0 (%arg3) : (!fir.box<!fir.array<?xf64>>, index) -> !fir.ref<f64>
+// CHECK: hlfir.assign %[[VAL_27:.*]] to %[[VAL_28:.*]] : f64, !fir.ref<f64>
+// CHECK: }
+// CHECK: fir.result %[[VAL_25:.*]]#0, %[[VAL_3:.*]] : !fir.box<!fir.array<?xf64>>, i1
+// CHECK: }
+// CHECK: %[[VAL_22:.*]] = fir.box_addr %[[VAL_21:.*]]#0 : (!fir.box<!fir.array<?xf64>>) -> !fir.ref<!fir.array<?xf64>>
+// CHECK: %[[VAL_23:.*]]:3 = hlfir.associate %[[VAL_5:.*]] {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
+// CHECK: fir.call @_QFPsb(%[[VAL_22:.*]], %[[VAL_23:.*]]#0) fastmath<contract> : (!fir.ref<!fir.array<?xf64>>, !fir.ref<i32>) -> ()
+// CHECK: fir.if %[[VAL_21:.*]]#1 {
+// CHECK: %[[VAL_24:.*]] = fir.box_addr %[[VAL_21:.*]]#0 : (!fir.box<!fir.array<?xf64>>) -> !fir.ref<!fir.array<?xf64>>
+// CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_24:.*]] : (!fir.ref<!fir.array<?xf64>>) -> !fir.heap<!fir.array<?xf64>>
+// CHECK: fir.freemem %[[VAL_25:.*]] : !fir.heap<!fir.array<?xf64>>
+// CHECK: }
+// CHECK: hlfir.end_associate %[[VAL_23:.*]]#1, %[[VAL_23:.*]]#2 : !fir.ref<i32>, i1
+// CHECK: return
+// CHECK: }
+
+// Test not inlining of hlfir.copy_in that requires the array to be copied out
+func.func private @_test_no_inline_copy_in(%arg0: !fir.box<!fir.array<?x?x?xf64>> {fir.bindc_name = "x"}, %arg1: !fir.ref<i32> {fir.bindc_name = "i"}, %arg2: !fir.ref<i32> {fir.bindc_name = "j"}) {
+ %0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf64>>>
+ %1 = fir.dummy_scope : !fir.dscope
+ %2:2 = hlfir.declare %arg1 dummy_scope %1 {uniq_name = "_QFFsb2Ei"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %3:2 = hlfir.declare %arg2 dummy_scope %1 {uniq_name = "_QFFsb2Ej"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %4:2 = hlfir.declare %arg0 dummy_scope %1 {uniq_name = "_QFFsb2Ex"} : (!fir.box<!fir.array<?x?x?xf64>>, !fir.dscope) -> (!fir.box<!fir.array<?x?x?xf64>>, !fir.box<!fir.array<?x?x?xf64>>)
+ %5 = fir.load %2#0 : !fir.ref<i32>
+ %6 = fir.convert %5 : (i32) -> i64
+ %c1 = arith.constant 1 : index
+ %c1_0 = arith.constant 1 : index
+ %7:3 = fir.box_dims %4#1, %c1_0 : (!fir.box<!fir.array<?x?x?xf64>>, index) -> (index, index, index)
+ %c1_1 = arith.constant 1 : index
+ %c0 = arith.constant 0 : index
+ %8 = arith.subi %7#1, %c1 : index
+ %9 = arith.addi %8, %c1_1 : index
+ %10 = arith.divsi %9, %c1_1 : index
+ %11 = arith.cmpi sgt, %10, %c0 : index
+ %12 = arith.select %11, %10, %c0 : index
+ %13 = fir.load %3#0 : !fir.ref<i32>
+ %14 = fir.convert %13 : (i32) -> i64
+ %15 = fir.shape %12 : (index) -> !fir.shape<1>
+ %16 = hlfir.designate %4#0 (%6, %c1:%7#1:%c1_1, %14) shape %15 : (!fir.box<!fir.array<?x?x?xf64>>, i64, index, index, index, i64, !fir.shape<1>) -> !fir.box<!fir.array<?xf64>>
+ %c100_i32 = arith.constant 100 : i32
+ %17:2 = hlfir.copy_in %16 to %0 : (!fir.box<!fir.array<?xf64>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.box<!fir.array<?xf64>>, i1)
+ %18 = fir.box_addr %17#0 : (!fir.box<!fir.array<?xf64>>) -> !fir.ref<!fir.array<?xf64>>
+ %19:3 = hlfir.associate %c100_i32 {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
+ fir.call @_QFPsb(%18, %19#1) fastmath<contract> : (!fir.ref<!fir.array<?xf64>>, !fir.ref<i32>) -> ()
+ hlfir.copy_out %0, %17#1 to %16 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, i1, !fir.box<!fir.array<?xf64>>) -> ()
+ hlfir.end_associate %19#1, %19#2 : !fir.ref<i32>, i1
+ return
+}
+
+// CHECK-LABEL: func.func private @_test_no_inline_copy_in(
+// CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<?x?x?xf64>> {fir.bindc_name = "x"},
+// CHECK-SAME: %[[VAL_1:.*]]: !fir.ref<i32> {fir.bindc_name = "i"},
+// CHECK-SAME: %[[VAL_2:.*]]: !fir.ref<i32> {fir.bindc_name = "j"}) {
+// CHECK: %[[VAL_3:.*]] = arith.constant 100 : i32
+// CHECK: %[[VAL_4:.*]] = arith.constant 0 : index
+// CHECK: %[[VAL_5:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_6:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf64>>>
+// CHECK: %[[VAL_7:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_1:.*]] dummy_scope %[[VAL_7:.*]] {uniq_name = "_QFFsb2Ei"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_2:.*]] dummy_scope %[[VAL_7:.*]] {uniq_name = "_QFFsb2Ej"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_0:.*]] dummy_scope %[[VAL_7:.*]] {uniq_name = "_QFFsb2Ex"} : (!fir.box<!fir.array<?x?x?xf64>>, !fir.dscope) -> (!fir.box<!fir.array<?x?x?xf64>>, !fir.box<!fir.array<?x?x?xf64>>)
+// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_8:.*]]#0 : !fir.ref<i32>
+// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11:.*]] : (i32) -> i64
+// CHECK: %[[VAL_13:.*]]:3 = fir.box_dims %[[VAL_10:.*]]#1, %[[VAL_5:.*]] : (!fir.box<!fir.array<?x?x?xf64>>, index) -> (index, index, index)
+// CHECK: %[[VAL_14:.*]] = arith.cmpi sgt, %[[VAL_13:.*]]#1, %[[VAL_4:.*]] : index
+// CHECK: %[[VAL_15:.*]] = arith.select %[[VAL_14:.*]], %[[VAL_13:.*]]#1, %[[VAL_4:.*]] : index
+// CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_9:.*]]#0 : !fir.ref<i32>
+// CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_16:.*]] : (i32) -> i64
+// CHECK: %[[VAL_18:.*]] = fir.shape %[[VAL_15:.*]] : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_10:.*]]#0 (%[[VAL_12:.*]], %[[VAL_5:.*]]:%[[VAL_13:.*]]#1:%[[VAL_5:.*]], %[[VAL_17:.*]]) shape %[[VAL_18:.*]] : (!fir.box<!fir.array<?x?x?xf64>>, i64, index, index, index, i64, !fir.shape<1>) -> !fir.box<!fir.array<?xf64>>
+// CHECK: %[[VAL_20:.*]]:2 = hlfir.copy_in %[[VAL_19:.*]] to %[[VAL_6:.*]] : (!fir.box<!fir.array<?xf64>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.box<!fir.array<?xf64>>, i1)
+// CHECK: %[[VAL_21:.*]] = fir.box_addr %[[VAL_20:.*]]#0 : (!fir.box<!fir.array<?xf64>>) -> !fir.ref<!fir.array<?xf64>>
+// CHECK: %[[VAL_22:.*]]:3 = hlfir.associate %[[VAL_3:.*]] {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
+// CHECK: fir.call @_QFPsb(%[[VAL_21:.*]], %[[VAL_22:.*]]#1) fastmath<contract> : (!fir.ref<!fir.array<?xf64>>, !fir.ref<i32>) -> ()
+// CHECK: hlfir.copy_out %[[VAL_6:.*]], %[[VAL_20:.*]]#1 to %[[VAL_19:.*]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, i1, !fir.box<!fir.array<?xf64>>) -> ()
+// CHECK: hlfir.end_associate %[[VAL_22:.*]]#1, %[[VAL_22:.*]]#2 : !fir.ref<i32>, i1
+// CHECK: return
+// CHECK: }
More information about the flang-commits
mailing list