[flang-commits] [flang] 1ddf180 - [flang] introduce fir.copy to avoid load store of aggregates (#130289)

via flang-commits flang-commits at lists.llvm.org
Tue Mar 11 01:31:06 PDT 2025


Author: jeanPerier
Date: 2025-03-11T09:31:03+01:00
New Revision: 1ddf18057a5aa1ee7010ec262ccfc80c39b99bf6

URL: https://github.com/llvm/llvm-project/commit/1ddf18057a5aa1ee7010ec262ccfc80c39b99bf6
DIFF: https://github.com/llvm/llvm-project/commit/1ddf18057a5aa1ee7010ec262ccfc80c39b99bf6.diff

LOG: [flang] introduce fir.copy to avoid load store of aggregates (#130289)

Introduce a FIR operation to do memcopy/memmove of compile time constant size types.

This is to avoid requiring derived type copies to done with load/store
which is badly supported in LLVM when the aggregate type is "big" (no
threshold can easily be defined here, better to always avoid them for
fir.type).

This was the root cause of the regressions caused by #114002 which introduced a
load/store of fir.type<> which caused hand/asserts to fire in LLVM on
several benchmarks.

See https://llvm.org/docs/Frontend/PerformanceTips.html#avoid-creating-values-of-aggregate-type

Added: 
    flang/test/Fir/copy-codegen.fir

Modified: 
    flang/include/flang/Optimizer/Dialect/FIROps.td
    flang/include/flang/Optimizer/Dialect/FIRType.h
    flang/lib/Optimizer/CodeGen/CodeGen.cpp
    flang/lib/Optimizer/Dialect/FIROps.cpp
    flang/test/Fir/fir-ops.fir
    flang/test/Fir/invalid.fir

Removed: 
    


################################################################################
diff  --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index c83c57186b46d..8325468c4b210 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -68,6 +68,12 @@ def IsBoxAddressOrValueTypePred
 def fir_BoxAddressOrValueType : Type<IsBoxAddressOrValueTypePred,
     "fir.box or fir.class type or reference">;
 
+def RefOfConstantSizeAggregateTypePred
+        : CPred<"::fir::isRefOfConstantSizeAggregateType($_self)">;
+def AnyRefOfConstantSizeAggregateType : TypeConstraint<
+      RefOfConstantSizeAggregateTypePred,
+      "a reference type to a constant size fir.array, fir.char, or fir.type">;
+
 //===----------------------------------------------------------------------===//
 // Memory SSA operations
 //===----------------------------------------------------------------------===//
@@ -342,6 +348,44 @@ def fir_StoreOp : fir_Op<"store", [FirAliasTagOpInterface]> {
   }];
 }
 
+def fir_CopyOp : fir_Op<"copy", []> {
+  let summary = "copy constant size memory";
+
+  let description = [{
+    Copy the memory from a source with compile time constant size to
+    a destination of the same type.
+
+    This is meant to be used for aggregate types where load and store
+    are not appropriate to make a copy because LLVM is not meant to
+    handle load and store of "big" aggregates.
+
+    Its "no_overlap" attribute allows indicating that the source and destination
+    are known to not overlap at compile time.
+
+    ```
+      !t =!fir.type<t{x:!fir.array<1000xi32>}>
+      fir.copy %x to %y : !fir.ref<!t>, !fir.ref<!t>
+    ```
+    TODO: add FirAliasTagOpInterface to carry TBAA.
+  }];
+
+  let arguments = (ins Arg<AnyRefOfConstantSizeAggregateType, "", [MemRead]>:$source,
+                       Arg<AnyRefOfConstantSizeAggregateType, "", [MemWrite]>:$destination,
+                       OptionalAttr<UnitAttr>:$no_overlap);
+
+  let builders = [OpBuilder<(ins "mlir::Value":$source,
+                                 "mlir::Value":$destination,
+                                  CArg<"bool", "false">:$no_overlap)>];
+
+  let assemblyFormat = [{
+    $source `to` $destination (`no_overlap` $no_overlap^)?
+    attr-dict `:` type(operands)
+  }];
+
+  let hasVerifier = 1;
+}
+
+
 def fir_SaveResultOp : fir_Op<"save_result", [AttrSizedOperandSegments]> {
   let summary = [{
     save an array, box, or record function result SSA-value to a memory location

diff  --git a/flang/include/flang/Optimizer/Dialect/FIRType.h b/flang/include/flang/Optimizer/Dialect/FIRType.h
index 3d30f4e673682..76e0aa352bcd9 100644
--- a/flang/include/flang/Optimizer/Dialect/FIRType.h
+++ b/flang/include/flang/Optimizer/Dialect/FIRType.h
@@ -498,6 +498,13 @@ inline bool isBoxProcAddressType(mlir::Type t) {
   return t && mlir::isa<fir::BoxProcType>(t);
 }
 
+inline bool isRefOfConstantSizeAggregateType(mlir::Type t) {
+  t = fir::dyn_cast_ptrEleTy(t);
+  return t &&
+         mlir::isa<fir::CharacterType, fir::RecordType, fir::SequenceType>(t) &&
+         !hasDynamicSize(t);
+}
+
 /// Return a string representation of `ty`.
 ///
 /// fir.array<10x10xf32> -> prefix_10x10xf32

diff  --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index b5b2f393f6ca0..5548f5f981b12 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -3545,6 +3545,36 @@ struct StoreOpConversion : public fir::FIROpConversion<fir::StoreOp> {
   }
 };
 
+/// `fir.copy` --> `llvm.memcpy` or `llvm.memmove`
+struct CopyOpConversion : public fir::FIROpConversion<fir::CopyOp> {
+  using FIROpConversion::FIROpConversion;
+
+  llvm::LogicalResult
+  matchAndRewrite(fir::CopyOp copy, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    mlir::Location loc = copy.getLoc();
+    mlir::Value llvmSource = adaptor.getSource();
+    mlir::Value llvmDestination = adaptor.getDestination();
+    mlir::Type i64Ty = mlir::IntegerType::get(rewriter.getContext(), 64);
+    mlir::Type copyTy = fir::unwrapRefType(copy.getSource().getType());
+    mlir::Value copySize =
+        genTypeStrideInBytes(loc, i64Ty, rewriter, convertType(copyTy));
+
+    mlir::LLVM::AliasAnalysisOpInterface newOp;
+    if (copy.getNoOverlap())
+      newOp = rewriter.create<mlir::LLVM::MemcpyOp>(
+          loc, llvmDestination, llvmSource, copySize, /*isVolatile=*/false);
+    else
+      newOp = rewriter.create<mlir::LLVM::MemmoveOp>(
+          loc, llvmDestination, llvmSource, copySize, /*isVolatile=*/false);
+
+    // TODO: propagate TBAA once FirAliasTagOpInterface added to CopyOp.
+    attachTBAATag(newOp, copyTy, copyTy, nullptr);
+    rewriter.eraseOp(copy);
+    return mlir::success();
+  }
+};
+
 namespace {
 
 /// Convert `fir.unboxchar` into two `llvm.extractvalue` instructions. One for
@@ -4148,11 +4178,11 @@ void fir::populateFIRToLLVMConversionPatterns(
       BoxOffsetOpConversion, BoxProcHostOpConversion, BoxRankOpConversion,
       BoxTypeCodeOpConversion, BoxTypeDescOpConversion, CallOpConversion,
       CmpcOpConversion, ConvertOpConversion, CoordinateOpConversion,
-      DTEntryOpConversion, DeclareOpConversion, DivcOpConversion,
-      EmboxOpConversion, EmboxCharOpConversion, EmboxProcOpConversion,
-      ExtractValueOpConversion, FieldIndexOpConversion, FirEndOpConversion,
-      FreeMemOpConversion, GlobalLenOpConversion, GlobalOpConversion,
-      InsertOnRangeOpConversion, IsPresentOpConversion,
+      CopyOpConversion, DTEntryOpConversion, DeclareOpConversion,
+      DivcOpConversion, EmboxOpConversion, EmboxCharOpConversion,
+      EmboxProcOpConversion, ExtractValueOpConversion, FieldIndexOpConversion,
+      FirEndOpConversion, FreeMemOpConversion, GlobalLenOpConversion,
+      GlobalOpConversion, InsertOnRangeOpConversion, IsPresentOpConversion,
       LenParamIndexOpConversion, LoadOpConversion, MulcOpConversion,
       NegcOpConversion, NoReassocOpConversion, SelectCaseOpConversion,
       SelectOpConversion, SelectRankOpConversion, SelectTypeOpConversion,

diff  --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index 7efb733eb565c..203a72af61b92 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -3940,6 +3940,26 @@ void fir::StoreOp::build(mlir::OpBuilder &builder, mlir::OperationState &result,
   build(builder, result, value, memref, {});
 }
 
+//===----------------------------------------------------------------------===//
+// CopyOp
+//===----------------------------------------------------------------------===//
+
+void fir::CopyOp::build(mlir::OpBuilder &builder, mlir::OperationState &result,
+                        mlir::Value source, mlir::Value destination,
+                        bool noOverlap) {
+  mlir::UnitAttr noOverlapAttr =
+      noOverlap ? builder.getUnitAttr() : mlir::UnitAttr{};
+  build(builder, result, source, destination, noOverlapAttr);
+}
+
+llvm::LogicalResult fir::CopyOp::verify() {
+  mlir::Type sourceType = fir::unwrapRefType(getSource().getType());
+  mlir::Type destinationType = fir::unwrapRefType(getDestination().getType());
+  if (sourceType != destinationType)
+    return emitOpError("source and destination must have the same value type");
+  return mlir::success();
+}
+
 //===----------------------------------------------------------------------===//
 // StringLitOp
 //===----------------------------------------------------------------------===//

diff  --git a/flang/test/Fir/copy-codegen.fir b/flang/test/Fir/copy-codegen.fir
new file mode 100644
index 0000000000000..eef1885c6a49c
--- /dev/null
+++ b/flang/test/Fir/copy-codegen.fir
@@ -0,0 +1,35 @@
+// Test fir.copy codegen.
+// RUN: fir-opt --fir-to-llvm-ir %s -o - | FileCheck %s
+
+!t=!fir.type<sometype{i:!fir.array<9xi32>}>
+
+module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"} {
+
+func.func @test_copy_1(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t>) {
+  fir.copy %arg0 to %arg1 no_overlap : !fir.ref<!t>, !fir.ref<!t>
+  return
+}
+// CHECK-LABEL:   llvm.func @test_copy_1(
+// CHECK-SAME:                           %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr,
+// CHECK-SAME:                           %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr) {
+// CHECK:           %[[VAL_2:.*]] = llvm.mlir.zero : !llvm.ptr
+// CHECK:           %[[VAL_3:.*]] = llvm.getelementptr %[[VAL_2]][1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"sometype", (array<9 x i32>)>
+// CHECK:           %[[VAL_4:.*]] = llvm.ptrtoint %[[VAL_3]] : !llvm.ptr to i64
+// CHECK:           "llvm.intr.memcpy"(%[[VAL_1]], %[[VAL_0]], %[[VAL_4]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i64) -> ()
+// CHECK:           llvm.return
+// CHECK:         }
+
+func.func @test_copy_2(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t>) {
+  fir.copy %arg0 to %arg1 : !fir.ref<!t>, !fir.ref<!t>
+  return
+}
+// CHECK-LABEL:   llvm.func @test_copy_2(
+// CHECK-SAME:                           %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr,
+// CHECK-SAME:                           %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr) {
+// CHECK:           %[[VAL_2:.*]] = llvm.mlir.zero : !llvm.ptr
+// CHECK:           %[[VAL_3:.*]] = llvm.getelementptr %[[VAL_2]][1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"sometype", (array<9 x i32>)>
+// CHECK:           %[[VAL_4:.*]] = llvm.ptrtoint %[[VAL_3]] : !llvm.ptr to i64
+// CHECK:           "llvm.intr.memmove"(%[[VAL_1]], %[[VAL_0]], %[[VAL_4]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i64) -> ()
+// CHECK:           llvm.return
+// CHECK:         }
+}

diff  --git a/flang/test/Fir/fir-ops.fir b/flang/test/Fir/fir-ops.fir
index 1bfcb3a9f3dc8..06b0bbbf0bd20 100644
--- a/flang/test/Fir/fir-ops.fir
+++ b/flang/test/Fir/fir-ops.fir
@@ -933,3 +933,12 @@ func.func @test_call_arg_attrs_indirect(%arg0: i16, %arg1: (i16)-> i16) -> i16 {
   %0 = fir.call %arg1(%arg0) : (i16 {llvm.noundef, llvm.signext}) -> (i16 {llvm.signext})
   return %0 : i16
 }
+
+// CHECK-LABEL: @test_copy(
+// CHECK-SAME:    %[[VAL_0:.*]]: !fir.ref<!fir.type<sometype{i:i32}>>,
+// CHECK-SAME:    %[[VAL_1:.*]]: !fir.ptr<!fir.type<sometype{i:i32}>>
+func.func @test_copy(%arg0: !fir.ref<!fir.type<sometype{i:i32}>>, %arg1: !fir.ptr<!fir.type<sometype{i:i32}>>) {
+  // CHECK: fir.copy %[[VAL_0]] to %[[VAL_1]] no_overlap : !fir.ref<!fir.type<sometype{i:i32}>>, !fir.ptr<!fir.type<sometype{i:i32}>>
+  fir.copy %arg0 to %arg1 no_overlap : !fir.ref<!fir.type<sometype{i:i32}>>, !fir.ptr<!fir.type<sometype{i:i32}>>
+  return
+}

diff  --git a/flang/test/Fir/invalid.fir b/flang/test/Fir/invalid.fir
index 7e3f9d6498412..feb2cd55b3786 100644
--- a/flang/test/Fir/invalid.fir
+++ b/flang/test/Fir/invalid.fir
@@ -1018,3 +1018,40 @@ func.func @bad_is_assumed_size(%arg0: !fir.ref<!fir.array<*:none>>) {
   %1 = fir.is_assumed_size %arg0 : (!fir.ref<!fir.array<*:none>>) -> i1
   return
 }
+
+// -----
+
+!t=!fir.type<sometype{i:i32}>
+!t2=!fir.type<sometype2{j:i32}>
+func.func @bad_copy_1(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t2>) {
+  // expected-error at +1{{'fir.copy' op source and destination must have the same value type}}
+  fir.copy %arg0 to %arg1 no_overlap : !fir.ref<!t>, !fir.ref<!t2>
+  return
+}
+
+// -----
+
+!t=!fir.type<sometype{i:i32}>
+func.func @bad_copy_2(%arg0: !fir.ref<!t>, %arg1: !t) {
+  // expected-error at +1{{'fir.copy' op operand #0 must be a reference type to a constant size fir.array, fir.char, or fir.type, but got '!fir.type<sometype{i:i32}>'}}
+  fir.copy %arg1 to %arg0 no_overlap : !t, !fir.ref<!t>
+  return
+}
+
+// -----
+
+!t=!fir.array<?xi32>
+func.func @bad_copy_3(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t>) {
+  // expected-error at +1{{'fir.copy' op operand #0 must be a reference type to a constant size fir.array, fir.char, or fir.type, but got '!fir.ref<!fir.array<?xi32>>'}}
+  fir.copy %arg0 to %arg1 no_overlap : !fir.ref<!t>, !fir.ref<!t>
+  return
+}
+
+// -----
+
+!t=f32
+func.func @bad_copy_4(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t>) {
+  // expected-error at +1{{'fir.copy' op operand #0 must be a reference type to a constant size fir.array, fir.char, or fir.type, but got '!fir.ref<f32>'}}
+  fir.copy %arg0 to %arg1 no_overlap : !fir.ref<!t>, !fir.ref<!t>
+  return
+}


        


More information about the flang-commits mailing list