[flang-commits] [flang] [flang] introduce fir.copy to avoid load store of aggregates (PR #130289)
via flang-commits
flang-commits at lists.llvm.org
Tue Mar 11 01:08:21 PDT 2025
https://github.com/jeanPerier updated https://github.com/llvm/llvm-project/pull/130289
>From c54efad7205ad195535eb4244e00326c32293aed Mon Sep 17 00:00:00 2001
From: Jean Perier <jperier at nvidia.com>
Date: Fri, 7 Mar 2025 06:27:48 -0800
Subject: [PATCH 1/3] [flang] introduce fir.copy to avoid load store of
aggregates
---
.../include/flang/Optimizer/Dialect/FIROps.td | 38 ++++++++++++++++++
flang/lib/Optimizer/CodeGen/CodeGen.cpp | 40 ++++++++++++++++---
flang/lib/Optimizer/Dialect/FIROps.cpp | 23 +++++++++++
flang/test/Fir/copy-codegen.fir | 35 ++++++++++++++++
flang/test/Fir/fir-ops.fir | 9 +++++
flang/test/Fir/invalid.fir | 28 +++++++++++++
6 files changed, 168 insertions(+), 5 deletions(-)
create mode 100644 flang/test/Fir/copy-codegen.fir
diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index c83c57186b46d..74e5e1fd2d6be 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -342,6 +342,44 @@ def fir_StoreOp : fir_Op<"store", [FirAliasTagOpInterface]> {
}];
}
+def fir_CopyOp : fir_Op<"copy", []> {
+ let summary = "copy constant size memory";
+
+ let description = [{
+ Copy the memory from a source with compile time constant size to
+ a destination of the same type.
+
+ This is meant to be used for aggregate types where load and store
+ are not appropriate to make a copy because LLVM is not meant to
+ handle load and store of "big" aggregates.
+
+ Its "no_overlap" attribute allows indicating that the source and destination
+ are known to not overlap at compile time.
+
+ ```
+ !t =!fir.type<t{x:!fir.array<1000xi32>}>
+ fir.copy %x to %y : !fir.ref<!t>, !fir.ref<!t>
+ ```
+ TODO: add FirAliasTagOpInterface to carry TBAA.
+ }];
+
+ let arguments = (ins Arg<AnyReferenceLike, "", [MemRead]>:$source,
+ Arg<AnyReferenceLike, "", [MemWrite]>:$destination,
+ OptionalAttr<UnitAttr>:$no_overlap);
+
+ let builders = [OpBuilder<(ins "mlir::Value":$source,
+ "mlir::Value":$destination,
+ CArg<"bool", "false">:$no_overlap)>];
+
+ let assemblyFormat = [{
+ $source `to` $destination (`no_overlap` $no_overlap^)?
+ attr-dict `:` type(operands)
+ }];
+
+ let hasVerifier = 1;
+}
+
+
def fir_SaveResultOp : fir_Op<"save_result", [AttrSizedOperandSegments]> {
let summary = [{
save an array, box, or record function result SSA-value to a memory location
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index a2743edd7844a..1d8372774ef9a 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -3539,6 +3539,36 @@ struct StoreOpConversion : public fir::FIROpConversion<fir::StoreOp> {
}
};
+/// `fir.copy` --> `llvm.memcpy` or `llvm.memmove`
+struct CopyOpConversion : public fir::FIROpConversion<fir::CopyOp> {
+ using FIROpConversion::FIROpConversion;
+
+ llvm::LogicalResult
+ matchAndRewrite(fir::CopyOp copy, OpAdaptor adaptor,
+ mlir::ConversionPatternRewriter &rewriter) const override {
+ mlir::Location loc = copy.getLoc();
+ mlir::Value llvmSource = adaptor.getSource();
+ mlir::Value llvmDestination = adaptor.getDestination();
+ mlir::Type i64Ty = mlir::IntegerType::get(rewriter.getContext(), 64);
+ mlir::Type copyTy = fir::unwrapRefType(copy.getSource().getType());
+ mlir::Value copySize =
+ genTypeStrideInBytes(loc, i64Ty, rewriter, convertType(copyTy));
+
+ mlir::LLVM::AliasAnalysisOpInterface newOp;
+ if (copy.getNoOverlap())
+ newOp = rewriter.create<mlir::LLVM::MemcpyOp>(
+ loc, llvmDestination, llvmSource, copySize, /*isVolatile=*/false);
+ else
+ newOp = rewriter.create<mlir::LLVM::MemmoveOp>(
+ loc, llvmDestination, llvmSource, copySize, /*isVolatile=*/false);
+
+ // TODO: propagate TBAA once FirAliasTagOpInterface added to CopyOp.
+ attachTBAATag(newOp, copyTy, copyTy, nullptr);
+ rewriter.eraseOp(copy);
+ return mlir::success();
+ }
+};
+
namespace {
/// Convert `fir.unboxchar` into two `llvm.extractvalue` instructions. One for
@@ -4142,11 +4172,11 @@ void fir::populateFIRToLLVMConversionPatterns(
BoxOffsetOpConversion, BoxProcHostOpConversion, BoxRankOpConversion,
BoxTypeCodeOpConversion, BoxTypeDescOpConversion, CallOpConversion,
CmpcOpConversion, ConvertOpConversion, CoordinateOpConversion,
- DTEntryOpConversion, DeclareOpConversion, DivcOpConversion,
- EmboxOpConversion, EmboxCharOpConversion, EmboxProcOpConversion,
- ExtractValueOpConversion, FieldIndexOpConversion, FirEndOpConversion,
- FreeMemOpConversion, GlobalLenOpConversion, GlobalOpConversion,
- InsertOnRangeOpConversion, IsPresentOpConversion,
+ CopyOpConversion, DTEntryOpConversion, DeclareOpConversion,
+ DivcOpConversion, EmboxOpConversion, EmboxCharOpConversion,
+ EmboxProcOpConversion, ExtractValueOpConversion, FieldIndexOpConversion,
+ FirEndOpConversion, FreeMemOpConversion, GlobalLenOpConversion,
+ GlobalOpConversion, InsertOnRangeOpConversion, IsPresentOpConversion,
LenParamIndexOpConversion, LoadOpConversion, MulcOpConversion,
NegcOpConversion, NoReassocOpConversion, SelectCaseOpConversion,
SelectOpConversion, SelectRankOpConversion, SelectTypeOpConversion,
diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index 7efb733eb565c..059514cbb0183 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -3940,6 +3940,29 @@ void fir::StoreOp::build(mlir::OpBuilder &builder, mlir::OperationState &result,
build(builder, result, value, memref, {});
}
+//===----------------------------------------------------------------------===//
+// CopyOp
+//===----------------------------------------------------------------------===//
+
+void fir::CopyOp::build(mlir::OpBuilder &builder, mlir::OperationState &result,
+ mlir::Value source, mlir::Value destination,
+ bool noOverlap) {
+ mlir::UnitAttr noOverlapAttr =
+ noOverlap ? builder.getUnitAttr() : mlir::UnitAttr{};
+ build(builder, result, source, destination, noOverlapAttr);
+}
+
+llvm::LogicalResult fir::CopyOp::verify() {
+ mlir::Type sourceType = fir::unwrapRefType(getSource().getType());
+ mlir::Type destinationType = fir::unwrapRefType(getDestination().getType());
+ if (sourceType != destinationType)
+ return emitOpError("source and destination must have the same value type");
+ if (fir::hasDynamicSize(sourceType))
+ return emitOpError(
+ "source value type must have a compile time constant size");
+ return mlir::success();
+}
+
//===----------------------------------------------------------------------===//
// StringLitOp
//===----------------------------------------------------------------------===//
diff --git a/flang/test/Fir/copy-codegen.fir b/flang/test/Fir/copy-codegen.fir
new file mode 100644
index 0000000000000..eef1885c6a49c
--- /dev/null
+++ b/flang/test/Fir/copy-codegen.fir
@@ -0,0 +1,35 @@
+// Test fir.copy codegen.
+// RUN: fir-opt --fir-to-llvm-ir %s -o - | FileCheck %s
+
+!t=!fir.type<sometype{i:!fir.array<9xi32>}>
+
+module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"} {
+
+func.func @test_copy_1(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t>) {
+ fir.copy %arg0 to %arg1 no_overlap : !fir.ref<!t>, !fir.ref<!t>
+ return
+}
+// CHECK-LABEL: llvm.func @test_copy_1(
+// CHECK-SAME: %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr,
+// CHECK-SAME: %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr) {
+// CHECK: %[[VAL_2:.*]] = llvm.mlir.zero : !llvm.ptr
+// CHECK: %[[VAL_3:.*]] = llvm.getelementptr %[[VAL_2]][1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"sometype", (array<9 x i32>)>
+// CHECK: %[[VAL_4:.*]] = llvm.ptrtoint %[[VAL_3]] : !llvm.ptr to i64
+// CHECK: "llvm.intr.memcpy"(%[[VAL_1]], %[[VAL_0]], %[[VAL_4]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i64) -> ()
+// CHECK: llvm.return
+// CHECK: }
+
+func.func @test_copy_2(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t>) {
+ fir.copy %arg0 to %arg1 : !fir.ref<!t>, !fir.ref<!t>
+ return
+}
+// CHECK-LABEL: llvm.func @test_copy_2(
+// CHECK-SAME: %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr,
+// CHECK-SAME: %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr) {
+// CHECK: %[[VAL_2:.*]] = llvm.mlir.zero : !llvm.ptr
+// CHECK: %[[VAL_3:.*]] = llvm.getelementptr %[[VAL_2]][1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"sometype", (array<9 x i32>)>
+// CHECK: %[[VAL_4:.*]] = llvm.ptrtoint %[[VAL_3]] : !llvm.ptr to i64
+// CHECK: "llvm.intr.memmove"(%[[VAL_1]], %[[VAL_0]], %[[VAL_4]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i64) -> ()
+// CHECK: llvm.return
+// CHECK: }
+}
diff --git a/flang/test/Fir/fir-ops.fir b/flang/test/Fir/fir-ops.fir
index 1bfcb3a9f3dc8..06b0bbbf0bd20 100644
--- a/flang/test/Fir/fir-ops.fir
+++ b/flang/test/Fir/fir-ops.fir
@@ -933,3 +933,12 @@ func.func @test_call_arg_attrs_indirect(%arg0: i16, %arg1: (i16)-> i16) -> i16 {
%0 = fir.call %arg1(%arg0) : (i16 {llvm.noundef, llvm.signext}) -> (i16 {llvm.signext})
return %0 : i16
}
+
+// CHECK-LABEL: @test_copy(
+// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.type<sometype{i:i32}>>,
+// CHECK-SAME: %[[VAL_1:.*]]: !fir.ptr<!fir.type<sometype{i:i32}>>
+func.func @test_copy(%arg0: !fir.ref<!fir.type<sometype{i:i32}>>, %arg1: !fir.ptr<!fir.type<sometype{i:i32}>>) {
+ // CHECK: fir.copy %[[VAL_0]] to %[[VAL_1]] no_overlap : !fir.ref<!fir.type<sometype{i:i32}>>, !fir.ptr<!fir.type<sometype{i:i32}>>
+ fir.copy %arg0 to %arg1 no_overlap : !fir.ref<!fir.type<sometype{i:i32}>>, !fir.ptr<!fir.type<sometype{i:i32}>>
+ return
+}
diff --git a/flang/test/Fir/invalid.fir b/flang/test/Fir/invalid.fir
index 7e3f9d6498412..0ef64b547f3e9 100644
--- a/flang/test/Fir/invalid.fir
+++ b/flang/test/Fir/invalid.fir
@@ -1018,3 +1018,31 @@ func.func @bad_is_assumed_size(%arg0: !fir.ref<!fir.array<*:none>>) {
%1 = fir.is_assumed_size %arg0 : (!fir.ref<!fir.array<*:none>>) -> i1
return
}
+
+// -----
+
+!t=!fir.type<sometype{i:i32}>
+!t2=!fir.type<sometype2{j:i32}>
+func.func @bad_copy_1(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t2>) {
+ // expected-error at +1{{'fir.copy' op source and destination must have the same value type}}
+ fir.copy %arg0 to %arg1 no_overlap : !fir.ref<!t>, !fir.ref<!t2>
+ return
+}
+
+// -----
+
+!t=!fir.type<sometype{i:i32}>
+func.func @bad_copy_2(%arg0: !fir.ref<!t>, %arg1: !t) {
+ // expected-error at +1{{'fir.copy' op operand #0 must be any reference, but got '!fir.type<sometype{i:i32}>'}}
+ fir.copy %arg1 to %arg0 no_overlap : !t, !fir.ref<!t>
+ return
+}
+
+// -----
+
+!t=!fir.array<?xi32>
+func.func @test_copy(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t>) {
+ // expected-error at +1{{'fir.copy' op source value type must have a compile time constant size}}
+ fir.copy %arg0 to %arg1 no_overlap : !fir.ref<!t>, !fir.ref<!t>
+ return
+}
>From adb58ab7786a5645ccebe0deb58c892b5fa9e344 Mon Sep 17 00:00:00 2001
From: Jean Perier <jperier at nvidia.com>
Date: Mon, 10 Mar 2025 03:59:14 -0700
Subject: [PATCH 2/3] add predicate to strengthen verifier
---
flang/include/flang/Optimizer/Dialect/FIROps.td | 10 ++++++++--
flang/include/flang/Optimizer/Dialect/FIRType.h | 7 +++++++
flang/test/Fir/invalid.fir | 15 ++++++++++++---
3 files changed, 27 insertions(+), 5 deletions(-)
diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index 74e5e1fd2d6be..8325468c4b210 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -68,6 +68,12 @@ def IsBoxAddressOrValueTypePred
def fir_BoxAddressOrValueType : Type<IsBoxAddressOrValueTypePred,
"fir.box or fir.class type or reference">;
+def RefOfConstantSizeAggregateTypePred
+ : CPred<"::fir::isRefOfConstantSizeAggregateType($_self)">;
+def AnyRefOfConstantSizeAggregateType : TypeConstraint<
+ RefOfConstantSizeAggregateTypePred,
+ "a reference type to a constant size fir.array, fir.char, or fir.type">;
+
//===----------------------------------------------------------------------===//
// Memory SSA operations
//===----------------------------------------------------------------------===//
@@ -363,8 +369,8 @@ def fir_CopyOp : fir_Op<"copy", []> {
TODO: add FirAliasTagOpInterface to carry TBAA.
}];
- let arguments = (ins Arg<AnyReferenceLike, "", [MemRead]>:$source,
- Arg<AnyReferenceLike, "", [MemWrite]>:$destination,
+ let arguments = (ins Arg<AnyRefOfConstantSizeAggregateType, "", [MemRead]>:$source,
+ Arg<AnyRefOfConstantSizeAggregateType, "", [MemWrite]>:$destination,
OptionalAttr<UnitAttr>:$no_overlap);
let builders = [OpBuilder<(ins "mlir::Value":$source,
diff --git a/flang/include/flang/Optimizer/Dialect/FIRType.h b/flang/include/flang/Optimizer/Dialect/FIRType.h
index 3d30f4e673682..76e0aa352bcd9 100644
--- a/flang/include/flang/Optimizer/Dialect/FIRType.h
+++ b/flang/include/flang/Optimizer/Dialect/FIRType.h
@@ -498,6 +498,13 @@ inline bool isBoxProcAddressType(mlir::Type t) {
return t && mlir::isa<fir::BoxProcType>(t);
}
+inline bool isRefOfConstantSizeAggregateType(mlir::Type t) {
+ t = fir::dyn_cast_ptrEleTy(t);
+ return t &&
+ mlir::isa<fir::CharacterType, fir::RecordType, fir::SequenceType>(t) &&
+ !hasDynamicSize(t);
+}
+
/// Return a string representation of `ty`.
///
/// fir.array<10x10xf32> -> prefix_10x10xf32
diff --git a/flang/test/Fir/invalid.fir b/flang/test/Fir/invalid.fir
index 0ef64b547f3e9..feb2cd55b3786 100644
--- a/flang/test/Fir/invalid.fir
+++ b/flang/test/Fir/invalid.fir
@@ -1033,7 +1033,7 @@ func.func @bad_copy_1(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t2>) {
!t=!fir.type<sometype{i:i32}>
func.func @bad_copy_2(%arg0: !fir.ref<!t>, %arg1: !t) {
- // expected-error at +1{{'fir.copy' op operand #0 must be any reference, but got '!fir.type<sometype{i:i32}>'}}
+ // expected-error at +1{{'fir.copy' op operand #0 must be a reference type to a constant size fir.array, fir.char, or fir.type, but got '!fir.type<sometype{i:i32}>'}}
fir.copy %arg1 to %arg0 no_overlap : !t, !fir.ref<!t>
return
}
@@ -1041,8 +1041,17 @@ func.func @bad_copy_2(%arg0: !fir.ref<!t>, %arg1: !t) {
// -----
!t=!fir.array<?xi32>
-func.func @test_copy(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t>) {
- // expected-error at +1{{'fir.copy' op source value type must have a compile time constant size}}
+func.func @bad_copy_3(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t>) {
+ // expected-error at +1{{'fir.copy' op operand #0 must be a reference type to a constant size fir.array, fir.char, or fir.type, but got '!fir.ref<!fir.array<?xi32>>'}}
+ fir.copy %arg0 to %arg1 no_overlap : !fir.ref<!t>, !fir.ref<!t>
+ return
+}
+
+// -----
+
+!t=f32
+func.func @bad_copy_4(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t>) {
+ // expected-error at +1{{'fir.copy' op operand #0 must be a reference type to a constant size fir.array, fir.char, or fir.type, but got '!fir.ref<f32>'}}
fir.copy %arg0 to %arg1 no_overlap : !fir.ref<!t>, !fir.ref<!t>
return
}
>From 420f3eda8af1c8becacea6ce251ad088c09ce42e Mon Sep 17 00:00:00 2001
From: Jean Perier <jperier at nvidia.com>
Date: Tue, 11 Mar 2025 01:08:02 -0700
Subject: [PATCH 3/3] remove duplicate check in verifier
---
flang/lib/Optimizer/Dialect/FIROps.cpp | 3 ---
1 file changed, 3 deletions(-)
diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index 059514cbb0183..203a72af61b92 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -3957,9 +3957,6 @@ llvm::LogicalResult fir::CopyOp::verify() {
mlir::Type destinationType = fir::unwrapRefType(getDestination().getType());
if (sourceType != destinationType)
return emitOpError("source and destination must have the same value type");
- if (fir::hasDynamicSize(sourceType))
- return emitOpError(
- "source value type must have a compile time constant size");
return mlir::success();
}
More information about the flang-commits
mailing list