[flang-commits] [flang] [flang] introduce fir.copy to avoid load store of aggregates (PR #130289)
via flang-commits
flang-commits at lists.llvm.org
Fri Mar 7 06:50:58 PST 2025
https://github.com/jeanPerier created https://github.com/llvm/llvm-project/pull/130289
Introduce a FIR operation to do memcopy/memove of compile time constant size types.
This is to avoid requiring derived type copies to done with load/store which is badly supported in LLVM when the aggregate type is "big" (no threshold can easily be defined here, better to always avoid them for fir.type).
This was the root cause of the regressions caused by https://github.com/llvm/llvm-project/pull/114002 which introduced a load/store of fir.type<> which caused hand/asserts to fire in LLVM on several benchmarks.
See https://llvm.org/docs/Frontend/PerformanceTips.html#avoid-creating-values-of-aggregate-type
We could directly use LLVM dialect memmove and memcopy operations, but I feel this would be intrusive for lowering tests because of the target dependent byte size, and also the conversions to llvm pointer types are noisy in the IR, so instead, this patch adds a simple op that will lower to these LLVM intrinsics and compute the byte size as late as possible.
I think it would be great to add it the FirAliasTagOpInterface, but the TBAA pass currently [does not support multiple operands](https://github.com/llvm/llvm-project/blob/1c1140c4cf7d4fde5de4b20c6b993bab9060d9fc/flang/lib/Optimizer/Transforms/AddAliasTags.cpp#L189), and I am not familiar enough with this to know how the TBAA from the source/destination should be merged (a relevant patch on the topic is [this llvm patch that preserved TBAA information in the load/store to memcopy pass](https://reviews.llvm.org/D108221?id=404635), it is probably a matter of doing something similar).
>From c54efad7205ad195535eb4244e00326c32293aed Mon Sep 17 00:00:00 2001
From: Jean Perier <jperier at nvidia.com>
Date: Fri, 7 Mar 2025 06:27:48 -0800
Subject: [PATCH] [flang] introduce fir.copy to avoid load store of aggregates
---
.../include/flang/Optimizer/Dialect/FIROps.td | 38 ++++++++++++++++++
flang/lib/Optimizer/CodeGen/CodeGen.cpp | 40 ++++++++++++++++---
flang/lib/Optimizer/Dialect/FIROps.cpp | 23 +++++++++++
flang/test/Fir/copy-codegen.fir | 35 ++++++++++++++++
flang/test/Fir/fir-ops.fir | 9 +++++
flang/test/Fir/invalid.fir | 28 +++++++++++++
6 files changed, 168 insertions(+), 5 deletions(-)
create mode 100644 flang/test/Fir/copy-codegen.fir
diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index c83c57186b46d..74e5e1fd2d6be 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -342,6 +342,44 @@ def fir_StoreOp : fir_Op<"store", [FirAliasTagOpInterface]> {
}];
}
+def fir_CopyOp : fir_Op<"copy", []> {
+ let summary = "copy constant size memory";
+
+ let description = [{
+ Copy the memory from a source with compile time constant size to
+ a destination of the same type.
+
+ This is meant to be used for aggregate types where load and store
+ are not appropriate to make a copy because LLVM is not meant to
+ handle load and store of "big" aggregates.
+
+ Its "no_overlap" attribute allows indicating that the source and destination
+ are known to not overlap at compile time.
+
+ ```
+ !t =!fir.type<t{x:!fir.array<1000xi32>}>
+ fir.copy %x to %y : !fir.ref<!t>, !fir.ref<!t>
+ ```
+ TODO: add FirAliasTagOpInterface to carry TBAA.
+ }];
+
+ let arguments = (ins Arg<AnyReferenceLike, "", [MemRead]>:$source,
+ Arg<AnyReferenceLike, "", [MemWrite]>:$destination,
+ OptionalAttr<UnitAttr>:$no_overlap);
+
+ let builders = [OpBuilder<(ins "mlir::Value":$source,
+ "mlir::Value":$destination,
+ CArg<"bool", "false">:$no_overlap)>];
+
+ let assemblyFormat = [{
+ $source `to` $destination (`no_overlap` $no_overlap^)?
+ attr-dict `:` type(operands)
+ }];
+
+ let hasVerifier = 1;
+}
+
+
def fir_SaveResultOp : fir_Op<"save_result", [AttrSizedOperandSegments]> {
let summary = [{
save an array, box, or record function result SSA-value to a memory location
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index a2743edd7844a..1d8372774ef9a 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -3539,6 +3539,36 @@ struct StoreOpConversion : public fir::FIROpConversion<fir::StoreOp> {
}
};
+/// `fir.copy` --> `llvm.memcpy` or `llvm.memmove`
+struct CopyOpConversion : public fir::FIROpConversion<fir::CopyOp> {
+ using FIROpConversion::FIROpConversion;
+
+ llvm::LogicalResult
+ matchAndRewrite(fir::CopyOp copy, OpAdaptor adaptor,
+ mlir::ConversionPatternRewriter &rewriter) const override {
+ mlir::Location loc = copy.getLoc();
+ mlir::Value llvmSource = adaptor.getSource();
+ mlir::Value llvmDestination = adaptor.getDestination();
+ mlir::Type i64Ty = mlir::IntegerType::get(rewriter.getContext(), 64);
+ mlir::Type copyTy = fir::unwrapRefType(copy.getSource().getType());
+ mlir::Value copySize =
+ genTypeStrideInBytes(loc, i64Ty, rewriter, convertType(copyTy));
+
+ mlir::LLVM::AliasAnalysisOpInterface newOp;
+ if (copy.getNoOverlap())
+ newOp = rewriter.create<mlir::LLVM::MemcpyOp>(
+ loc, llvmDestination, llvmSource, copySize, /*isVolatile=*/false);
+ else
+ newOp = rewriter.create<mlir::LLVM::MemmoveOp>(
+ loc, llvmDestination, llvmSource, copySize, /*isVolatile=*/false);
+
+ // TODO: propagate TBAA once FirAliasTagOpInterface added to CopyOp.
+ attachTBAATag(newOp, copyTy, copyTy, nullptr);
+ rewriter.eraseOp(copy);
+ return mlir::success();
+ }
+};
+
namespace {
/// Convert `fir.unboxchar` into two `llvm.extractvalue` instructions. One for
@@ -4142,11 +4172,11 @@ void fir::populateFIRToLLVMConversionPatterns(
BoxOffsetOpConversion, BoxProcHostOpConversion, BoxRankOpConversion,
BoxTypeCodeOpConversion, BoxTypeDescOpConversion, CallOpConversion,
CmpcOpConversion, ConvertOpConversion, CoordinateOpConversion,
- DTEntryOpConversion, DeclareOpConversion, DivcOpConversion,
- EmboxOpConversion, EmboxCharOpConversion, EmboxProcOpConversion,
- ExtractValueOpConversion, FieldIndexOpConversion, FirEndOpConversion,
- FreeMemOpConversion, GlobalLenOpConversion, GlobalOpConversion,
- InsertOnRangeOpConversion, IsPresentOpConversion,
+ CopyOpConversion, DTEntryOpConversion, DeclareOpConversion,
+ DivcOpConversion, EmboxOpConversion, EmboxCharOpConversion,
+ EmboxProcOpConversion, ExtractValueOpConversion, FieldIndexOpConversion,
+ FirEndOpConversion, FreeMemOpConversion, GlobalLenOpConversion,
+ GlobalOpConversion, InsertOnRangeOpConversion, IsPresentOpConversion,
LenParamIndexOpConversion, LoadOpConversion, MulcOpConversion,
NegcOpConversion, NoReassocOpConversion, SelectCaseOpConversion,
SelectOpConversion, SelectRankOpConversion, SelectTypeOpConversion,
diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index 7efb733eb565c..059514cbb0183 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -3940,6 +3940,29 @@ void fir::StoreOp::build(mlir::OpBuilder &builder, mlir::OperationState &result,
build(builder, result, value, memref, {});
}
+//===----------------------------------------------------------------------===//
+// CopyOp
+//===----------------------------------------------------------------------===//
+
+void fir::CopyOp::build(mlir::OpBuilder &builder, mlir::OperationState &result,
+ mlir::Value source, mlir::Value destination,
+ bool noOverlap) {
+ mlir::UnitAttr noOverlapAttr =
+ noOverlap ? builder.getUnitAttr() : mlir::UnitAttr{};
+ build(builder, result, source, destination, noOverlapAttr);
+}
+
+llvm::LogicalResult fir::CopyOp::verify() {
+ mlir::Type sourceType = fir::unwrapRefType(getSource().getType());
+ mlir::Type destinationType = fir::unwrapRefType(getDestination().getType());
+ if (sourceType != destinationType)
+ return emitOpError("source and destination must have the same value type");
+ if (fir::hasDynamicSize(sourceType))
+ return emitOpError(
+ "source value type must have a compile time constant size");
+ return mlir::success();
+}
+
//===----------------------------------------------------------------------===//
// StringLitOp
//===----------------------------------------------------------------------===//
diff --git a/flang/test/Fir/copy-codegen.fir b/flang/test/Fir/copy-codegen.fir
new file mode 100644
index 0000000000000..eef1885c6a49c
--- /dev/null
+++ b/flang/test/Fir/copy-codegen.fir
@@ -0,0 +1,35 @@
+// Test fir.copy codegen.
+// RUN: fir-opt --fir-to-llvm-ir %s -o - | FileCheck %s
+
+!t=!fir.type<sometype{i:!fir.array<9xi32>}>
+
+module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"} {
+
+func.func @test_copy_1(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t>) {
+ fir.copy %arg0 to %arg1 no_overlap : !fir.ref<!t>, !fir.ref<!t>
+ return
+}
+// CHECK-LABEL: llvm.func @test_copy_1(
+// CHECK-SAME: %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr,
+// CHECK-SAME: %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr) {
+// CHECK: %[[VAL_2:.*]] = llvm.mlir.zero : !llvm.ptr
+// CHECK: %[[VAL_3:.*]] = llvm.getelementptr %[[VAL_2]][1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"sometype", (array<9 x i32>)>
+// CHECK: %[[VAL_4:.*]] = llvm.ptrtoint %[[VAL_3]] : !llvm.ptr to i64
+// CHECK: "llvm.intr.memcpy"(%[[VAL_1]], %[[VAL_0]], %[[VAL_4]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i64) -> ()
+// CHECK: llvm.return
+// CHECK: }
+
+func.func @test_copy_2(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t>) {
+ fir.copy %arg0 to %arg1 : !fir.ref<!t>, !fir.ref<!t>
+ return
+}
+// CHECK-LABEL: llvm.func @test_copy_2(
+// CHECK-SAME: %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr,
+// CHECK-SAME: %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr) {
+// CHECK: %[[VAL_2:.*]] = llvm.mlir.zero : !llvm.ptr
+// CHECK: %[[VAL_3:.*]] = llvm.getelementptr %[[VAL_2]][1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"sometype", (array<9 x i32>)>
+// CHECK: %[[VAL_4:.*]] = llvm.ptrtoint %[[VAL_3]] : !llvm.ptr to i64
+// CHECK: "llvm.intr.memmove"(%[[VAL_1]], %[[VAL_0]], %[[VAL_4]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i64) -> ()
+// CHECK: llvm.return
+// CHECK: }
+}
diff --git a/flang/test/Fir/fir-ops.fir b/flang/test/Fir/fir-ops.fir
index 1bfcb3a9f3dc8..06b0bbbf0bd20 100644
--- a/flang/test/Fir/fir-ops.fir
+++ b/flang/test/Fir/fir-ops.fir
@@ -933,3 +933,12 @@ func.func @test_call_arg_attrs_indirect(%arg0: i16, %arg1: (i16)-> i16) -> i16 {
%0 = fir.call %arg1(%arg0) : (i16 {llvm.noundef, llvm.signext}) -> (i16 {llvm.signext})
return %0 : i16
}
+
+// CHECK-LABEL: @test_copy(
+// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.type<sometype{i:i32}>>,
+// CHECK-SAME: %[[VAL_1:.*]]: !fir.ptr<!fir.type<sometype{i:i32}>>
+func.func @test_copy(%arg0: !fir.ref<!fir.type<sometype{i:i32}>>, %arg1: !fir.ptr<!fir.type<sometype{i:i32}>>) {
+ // CHECK: fir.copy %[[VAL_0]] to %[[VAL_1]] no_overlap : !fir.ref<!fir.type<sometype{i:i32}>>, !fir.ptr<!fir.type<sometype{i:i32}>>
+ fir.copy %arg0 to %arg1 no_overlap : !fir.ref<!fir.type<sometype{i:i32}>>, !fir.ptr<!fir.type<sometype{i:i32}>>
+ return
+}
diff --git a/flang/test/Fir/invalid.fir b/flang/test/Fir/invalid.fir
index 7e3f9d6498412..0ef64b547f3e9 100644
--- a/flang/test/Fir/invalid.fir
+++ b/flang/test/Fir/invalid.fir
@@ -1018,3 +1018,31 @@ func.func @bad_is_assumed_size(%arg0: !fir.ref<!fir.array<*:none>>) {
%1 = fir.is_assumed_size %arg0 : (!fir.ref<!fir.array<*:none>>) -> i1
return
}
+
+// -----
+
+!t=!fir.type<sometype{i:i32}>
+!t2=!fir.type<sometype2{j:i32}>
+func.func @bad_copy_1(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t2>) {
+ // expected-error at +1{{'fir.copy' op source and destination must have the same value type}}
+ fir.copy %arg0 to %arg1 no_overlap : !fir.ref<!t>, !fir.ref<!t2>
+ return
+}
+
+// -----
+
+!t=!fir.type<sometype{i:i32}>
+func.func @bad_copy_2(%arg0: !fir.ref<!t>, %arg1: !t) {
+ // expected-error at +1{{'fir.copy' op operand #0 must be any reference, but got '!fir.type<sometype{i:i32}>'}}
+ fir.copy %arg1 to %arg0 no_overlap : !t, !fir.ref<!t>
+ return
+}
+
+// -----
+
+!t=!fir.array<?xi32>
+func.func @test_copy(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t>) {
+ // expected-error at +1{{'fir.copy' op source value type must have a compile time constant size}}
+ fir.copy %arg0 to %arg1 no_overlap : !fir.ref<!t>, !fir.ref<!t>
+ return
+}
More information about the flang-commits
mailing list