[flang-commits] [flang] [flang][fir] always use memcpy for fir.box (PR #113949)
via flang-commits
flang-commits at lists.llvm.org
Mon Oct 28 12:18:54 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-flang-fir-hlfir
Author: Asher Mancinelli (ashermancinelli)
<details>
<summary>Changes</summary>
@<!-- -->jeanPerier explained the importance of converting box loads and stores into `memcpy`s instead of aggregate loads and stores, and I'll do my best to explain it here.
* [(godbolt link) Example comparing opt transformations on memcpys vs aggregate load/stores](https://godbolt.org/z/be7xM83cG)
* LLVM can more effectively reason about memcpys compared to aggregate load/stores.
* This came up when others were discussing array descriptors for assumed-rank arrays passed to `bind(c)` subroutines, with the implication that the array descriptors are known to have lower bounds of 1 and that they are not pointer/allocatable types.
* [(godbolt link) Clang also uses memcpys so we should probably follow them, assuming the clang developers are generatign what they know Opt will handle more effectively.](https://godbolt.org/z/YT4x7387W)
* This currently may not help much without the `nocapture` attribute being propagated to function calls, but [it looks like someone may do this soon (discourse link)](https://discourse.llvm.org/t/applying-the-nocapture-attribute-to-reference-passed-arguments-in-fortran-subroutines/81401/23) or I can do this in a follow-up patch.
Note on test `flang/test/Fir/embox-char.fir`: it looks like the original test was auto-generated. I wasn't too sure which parts were especially important to test, so I regenerated the test. If we want the updated version to look more like the old version, I'll make those changes.
---
Patch is 57.86 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/113949.diff
11 Files Affected:
- (modified) flang/lib/Optimizer/CodeGen/CodeGen.cpp (+20-39)
- (modified) flang/test/Fir/box.fir (+13-6)
- (modified) flang/test/Fir/convert-to-llvm-openmp-and-fir.fir (+2-2)
- (modified) flang/test/Fir/convert-to-llvm.fir (+15-13)
- (modified) flang/test/Fir/embox-char.fir (+121-118)
- (modified) flang/test/Fir/polymorphic.fir (+4-8)
- (modified) flang/test/Fir/tbaa.fir (+2-2)
- (modified) flang/test/Integration/OpenMP/private-global.f90 (+3-2)
- (modified) flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90 (+1-2)
- (modified) flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 (+1-1)
- (modified) flang/test/Lower/allocatable-polymorphic.f90 (+6-12)
``````````diff
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index e6eeb0d5db4a84..4c8c56e0f21cef 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -2949,9 +2949,10 @@ struct LoadOpConversion : public fir::FIROpConversion<fir::LoadOp> {
llvm::LogicalResult
matchAndRewrite(fir::LoadOp load, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const override {
+
mlir::Type llvmLoadTy = convertObjectType(load.getType());
if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(load.getType())) {
- // fir.box is a special case because it is considered as an ssa values in
+ // fir.box is a special case because it is considered an ssa value in
// fir, but it is lowered as a pointer to a descriptor. So
// fir.ref<fir.box> and fir.box end up being the same llvm types and
// loading a fir.ref<fir.box> is implemented as taking a snapshot of the
@@ -2960,30 +2961,17 @@ struct LoadOpConversion : public fir::FIROpConversion<fir::LoadOp> {
mlir::Location loc = load.getLoc();
auto newBoxStorage =
genAllocaAndAddrCastWithType(loc, llvmLoadTy, defaultAlign, rewriter);
- // TODO: always generate llvm.memcpy, LLVM is better at optimizing it than
- // aggregate loads + stores.
- if (boxTy.isAssumedRank()) {
-
- TypePair boxTypePair{boxTy, llvmLoadTy};
- mlir::Value boxSize =
- computeBoxSize(loc, boxTypePair, inputBoxStorage, rewriter);
- auto memcpy = rewriter.create<mlir::LLVM::MemcpyOp>(
- loc, newBoxStorage, inputBoxStorage, boxSize, /*isVolatile=*/false);
- if (std::optional<mlir::ArrayAttr> optionalTag = load.getTbaa())
- memcpy.setTBAATags(*optionalTag);
- else
- attachTBAATag(memcpy, boxTy, boxTy, nullptr);
- } else {
- auto boxValue = rewriter.create<mlir::LLVM::LoadOp>(loc, llvmLoadTy,
- inputBoxStorage);
- if (std::optional<mlir::ArrayAttr> optionalTag = load.getTbaa())
- boxValue.setTBAATags(*optionalTag);
- else
- attachTBAATag(boxValue, boxTy, boxTy, nullptr);
- auto storeOp =
- rewriter.create<mlir::LLVM::StoreOp>(loc, boxValue, newBoxStorage);
- attachTBAATag(storeOp, boxTy, boxTy, nullptr);
- }
+
+ TypePair boxTypePair{boxTy, llvmLoadTy};
+ mlir::Value boxSize =
+ computeBoxSize(loc, boxTypePair, inputBoxStorage, rewriter);
+ auto memcpy = rewriter.create<mlir::LLVM::MemcpyOp>(
+ loc, newBoxStorage, inputBoxStorage, boxSize, /*isVolatile=*/false);
+
+ if (std::optional<mlir::ArrayAttr> optionalTag = load.getTbaa())
+ memcpy.setTBAATags(*optionalTag);
+ else
+ attachTBAATag(memcpy, boxTy, boxTy, nullptr);
rewriter.replaceOp(load, newBoxStorage);
} else {
auto loadOp = rewriter.create<mlir::LLVM::LoadOp>(
@@ -3227,20 +3215,13 @@ struct StoreOpConversion : public fir::FIROpConversion<fir::StoreOp> {
mlir::LLVM::AliasAnalysisOpInterface newOp;
if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(storeTy)) {
mlir::Type llvmBoxTy = lowerTy().convertBoxTypeAsStruct(boxTy);
- // fir.box value is actually in memory, load it first before storing it,
- // or do a memcopy for assumed-rank descriptors.
- if (boxTy.isAssumedRank()) {
- TypePair boxTypePair{boxTy, llvmBoxTy};
- mlir::Value boxSize =
- computeBoxSize(loc, boxTypePair, llvmValue, rewriter);
- newOp = rewriter.create<mlir::LLVM::MemcpyOp>(
- loc, llvmMemref, llvmValue, boxSize, /*isVolatile=*/false);
- } else {
- auto val =
- rewriter.create<mlir::LLVM::LoadOp>(loc, llvmBoxTy, llvmValue);
- attachTBAATag(val, boxTy, boxTy, nullptr);
- newOp = rewriter.create<mlir::LLVM::StoreOp>(loc, val, llvmMemref);
- }
+ // Always use memcpy because LLVM is not as effective at optimizing
+ // aggregate loads/stores as it is optimizing memcpy.
+ TypePair boxTypePair{boxTy, llvmBoxTy};
+ mlir::Value boxSize =
+ computeBoxSize(loc, boxTypePair, llvmValue, rewriter);
+ newOp = rewriter.create<mlir::LLVM::MemcpyOp>(
+ loc, llvmMemref, llvmValue, boxSize, /*isVolatile=*/false);
} else {
newOp = rewriter.create<mlir::LLVM::StoreOp>(loc, llvmValue, llvmMemref);
}
diff --git a/flang/test/Fir/box.fir b/flang/test/Fir/box.fir
index 81a4d8bc13bf01..fd9fa1f2b3aabd 100644
--- a/flang/test/Fir/box.fir
+++ b/flang/test/Fir/box.fir
@@ -56,12 +56,14 @@ func.func @fa(%a : !fir.ref<!fir.array<100xf32>>) {
// CHECK-LABEL: define void @b1(
// CHECK-SAME: ptr %[[res:.*]], ptr %[[arg0:.*]], i64 %[[arg1:.*]])
func.func @b1(%arg0 : !fir.ref<!fir.char<1,?>>, %arg1 : index) -> !fir.box<!fir.char<1,?>> {
+ // CHECK: %[[alloca:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }
// CHECK: %[[size:.*]] = mul i64 ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64), %[[arg1]]
// CHECK: insertvalue {{.*}} undef, i64 %[[size]], 1
// CHECK: insertvalue {{.*}} i32 20240719, 2
// CHECK: insertvalue {{.*}} ptr %[[arg0]], 0
%x = fir.embox %arg0 typeparams %arg1 : (!fir.ref<!fir.char<1,?>>, index) -> !fir.box<!fir.char<1,?>>
- // CHECK: store {{.*}}, ptr %[[res]]
+ // CHECK: store {{.*}}, ptr %[[alloca]]
+ // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[res]], ptr %[[alloca]], i32 24, i1 false)
return %x : !fir.box<!fir.char<1,?>>
}
@@ -71,11 +73,13 @@ func.func @b1(%arg0 : !fir.ref<!fir.char<1,?>>, %arg1 : index) -> !fir.box<!fir.
// CHECK-SAME: ptr %[[arg0:.*]], i64 %[[arg1:.*]])
func.func @b2(%arg0 : !fir.ref<!fir.array<?x!fir.char<1,5>>>, %arg1 : index) -> !fir.box<!fir.array<?x!fir.char<1,5>>> {
%1 = fir.shape %arg1 : (index) -> !fir.shape<1>
+ // CHECK: %[[alloca:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }
// CHECK: insertvalue {{.*}} { ptr undef, i64 ptrtoint (ptr getelementptr ([5 x i8], ptr null, i32 1) to i64), i32 20240719, i8 1, i8 40, i8 0, i8 0, {{.*}} }, i64 %[[arg1]], 7, 0, 1
// CHECK: insertvalue {{.*}} %{{.*}}, i64 ptrtoint (ptr getelementptr ([5 x i8], ptr null, i32 1) to i64), 7, 0, 2
// CHECK: insertvalue {{.*}} ptr %[[arg0]], 0
%2 = fir.embox %arg0(%1) : (!fir.ref<!fir.array<?x!fir.char<1,5>>>, !fir.shape<1>) -> !fir.box<!fir.array<?x!fir.char<1,5>>>
- // CHECK: store {{.*}}, ptr %[[res]]
+ // CHECK: store {{.*}}, ptr %[[alloca]]
+ // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[res]], ptr %[[alloca]], i32 48, i1 false)
return %2 : !fir.box<!fir.array<?x!fir.char<1,5>>>
}
@@ -84,6 +88,7 @@ func.func @b2(%arg0 : !fir.ref<!fir.array<?x!fir.char<1,5>>>, %arg1 : index) ->
// CHECK-SAME: ptr %[[res:.*]], ptr %[[arg0:.*]], i64 %[[arg1:.*]], i64 %[[arg2:.*]])
func.func @b3(%arg0 : !fir.ref<!fir.array<?x!fir.char<1,?>>>, %arg1 : index, %arg2 : index) -> !fir.box<!fir.array<?x!fir.char<1,?>>> {
%1 = fir.shape %arg2 : (index) -> !fir.shape<1>
+ // CHECK: %[[alloca:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }
// CHECK: %[[size:.*]] = mul i64 ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64), %[[arg1]]
// CHECK: insertvalue {{.*}} i64 %[[size]], 1
// CHECK: insertvalue {{.*}} i32 20240719, 2
@@ -91,7 +96,8 @@ func.func @b3(%arg0 : !fir.ref<!fir.array<?x!fir.char<1,?>>>, %arg1 : index, %ar
// CHECK: insertvalue {{.*}} i64 %[[size]], 7, 0, 2
// CHECK: insertvalue {{.*}} ptr %[[arg0]], 0
%2 = fir.embox %arg0(%1) typeparams %arg1 : (!fir.ref<!fir.array<?x!fir.char<1,?>>>, !fir.shape<1>, index) -> !fir.box<!fir.array<?x!fir.char<1,?>>>
- // CHECK: store {{.*}}, ptr %[[res]]
+ // CHECK: store {{.*}}, ptr %[[alloca]]
+ // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[res]], ptr %[[alloca]], i32 48, i1 false)
return %2 : !fir.box<!fir.array<?x!fir.char<1,?>>>
}
@@ -101,6 +107,7 @@ func.func @b3(%arg0 : !fir.ref<!fir.array<?x!fir.char<1,?>>>, %arg1 : index, %ar
func.func @b4(%arg0 : !fir.ref<!fir.array<7x!fir.char<1,?>>>, %arg1 : index) -> !fir.box<!fir.array<7x!fir.char<1,?>>> {
%c_7 = arith.constant 7 : index
%1 = fir.shape %c_7 : (index) -> !fir.shape<1>
+ // CHECK: %[[alloca:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }
// CHECK: %[[size:.*]] = mul i64 ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64), %[[arg1]]
// CHECK: insertvalue {{.*}} i64 %[[size]], 1
// CHECK: insertvalue {{.*}} i32 20240719, 2
@@ -108,7 +115,8 @@ func.func @b4(%arg0 : !fir.ref<!fir.array<7x!fir.char<1,?>>>, %arg1 : index) ->
// CHECK: insertvalue {{.*}} i64 %[[size]], 7, 0, 2
// CHECK: insertvalue {{.*}} ptr %[[arg0]], 0
%x = fir.embox %arg0(%1) typeparams %arg1 : (!fir.ref<!fir.array<7x!fir.char<1,?>>>, !fir.shape<1>, index) -> !fir.box<!fir.array<7x!fir.char<1,?>>>
- // CHECK: store {{.*}}, ptr %[[res]]
+ // CHECK: store {{.*}}, ptr %[[alloca]]
+ // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[res]], ptr %[[alloca]], i32 48, i1 false)
return %x : !fir.box<!fir.array<7x!fir.char<1,?>>>
}
@@ -117,8 +125,7 @@ func.func @b4(%arg0 : !fir.ref<!fir.array<7x!fir.char<1,?>>>, %arg1 : index) ->
// CHECK-SAME: ptr %[[arg0:.*]], ptr %[[arg1:.*]])
func.func @b5(%arg0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>, %arg1 : !fir.box<!fir.heap<!fir.array<?x?xf32>>>) {
fir.store %arg1 to %arg0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>
- // CHECK: %[[boxLoad:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [2 x [3 x i64]] }, ptr %[[arg1]]
- // CHECK: store { ptr, i64, i32, i8, i8, i8, i8, [2 x [3 x i64]] } %[[boxLoad]], ptr %[[arg0]]
+ // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %0, ptr %1, i32 72, i1 false)
return
}
diff --git a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
index 335877e7c9a872..168526518865b4 100644
--- a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
+++ b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
@@ -799,8 +799,8 @@ func.func @_QPs(%arg0: !fir.ref<complex<f32>> {fir.bindc_name = "x"}) {
//CHECK: omp.parallel {
//CHECK: %[[CONST_1:.*]] = llvm.mlir.constant(1 : i32) : i32
//CHECK: %[[ALLOCA_1:.*]] = llvm.alloca %[[CONST_1:.*]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
-//CHECK: %[[LOAD:.*]] = llvm.load %[[ALLOCA]] : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
-//CHECK: llvm.store %[[LOAD]], %[[ALLOCA_1]] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+//CHECK: %[[SIZE:.*]] = llvm.mlir.constant(24 : i32) : i32
+//CHECK: "llvm.intr.memcpy"(%[[ALLOCA_1]], %[[ALLOCA]], %[[SIZE]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
//CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA_1]][0, 0] : (!llvm.ptr) -> !llvm.ptr
//CHECK: %[[LOAD_2:.*]] = llvm.load %[[GEP]] : !llvm.ptr -> !llvm.ptr
//CHECK: omp.terminator
diff --git a/flang/test/Fir/convert-to-llvm.fir b/flang/test/Fir/convert-to-llvm.fir
index 1182a0a10f218b..fa391fa6cc7a7d 100644
--- a/flang/test/Fir/convert-to-llvm.fir
+++ b/flang/test/Fir/convert-to-llvm.fir
@@ -862,8 +862,8 @@ func.func @test_store_box(%array : !fir.ref<!fir.box<!fir.array<?x?xf32>>>, %box
// CHECK-LABEL: llvm.func @test_store_box
// CHECK-SAME: (%[[arg0:.*]]: !llvm.ptr,
// CHECK-SAME: %[[arg1:.*]]: !llvm.ptr) {
-// CHECK-NEXT: %[[box_to_store:.*]] = llvm.load %arg1 : !llvm.ptr -> !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<2 x array<3 x i{{.*}}>>)>
-// CHECK-NEXT: llvm.store %[[box_to_store]], %[[arg0]] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<2 x array<3 x i{{.*}}>>)>, !llvm.ptr
+// CHECK-NEXT: %[[size:.*]] = llvm.mlir.constant(72 : i32) : i32
+// CHECK-NEXT: "llvm.intr.memcpy"(%[[arg0]], %[[arg1]], %[[size]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
// CHECK-NEXT: llvm.return
// CHECK-NEXT: }
@@ -875,15 +875,17 @@ func.func @store_unlimited_polymorphic_box(%arg0 : !fir.class<none>, %arg1 : !fi
fir.store %arg3 to %arg3r : !fir.ref<!fir.box<!fir.array<?xnone>>>
return
}
-// CHECK-LABEL: llvm.func @store_unlimited_polymorphic_box(
-// CHECK: %[[VAL_8:.*]] = llvm.load %{{.*}} : !llvm.ptr -> !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)>
-// CHECK: llvm.store %[[VAL_8]], %{{.*}} : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)>, !llvm.ptr
-// CHECK: %[[VAL_9:.*]] = llvm.load %{{.*}} : !llvm.ptr -> !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i{{.*}}>>, ptr, array<1 x i{{.*}}>)>
-// CHECK: llvm.store %[[VAL_9]], %{{.*}} : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i{{.*}}>>, ptr, array<1 x i{{.*}}>)>, !llvm.ptr
-// CHECK: %[[VAL_10:.*]] = llvm.load %{{.*}} : !llvm.ptr -> !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)>
-// CHECK: llvm.store %[[VAL_10]], %{{.*}} : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)>, !llvm.ptr
-// CHECK: %[[VAL_11:.*]] = llvm.load %{{.*}}: !llvm.ptr
-// CHECK: llvm.store %[[VAL_11]], %{{.*}} : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i{{.*}}>>, ptr, array<1 x i{{.*}}>)>, !llvm.ptr
+// CHECK: llvm.func @store_unlimited_polymorphic_box(%[[VAL_0:.*]]: !llvm.ptr, %[[VAL_1:.*]]: !llvm.ptr, %[[VAL_2:.*]]: !llvm.ptr, %[[VAL_3:.*]]: !llvm.ptr, %[[VAL_4:.*]]: !llvm.ptr, %[[VAL_5:.*]]: !llvm.ptr, %[[VAL_6:.*]]: !llvm.ptr, %[[VAL_7:.*]]: !llvm.ptr) {
+// CHECK: %[[VAL_8:.*]] = llvm.mlir.constant(40 : i32) : i32
+// CHECK: "llvm.intr.memcpy"(%[[VAL_4]], %[[VAL_0]], %[[VAL_8]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
+// CHECK: %[[VAL_9:.*]] = llvm.mlir.constant(64 : i32) : i32
+// CHECK: "llvm.intr.memcpy"(%[[VAL_5]], %[[VAL_1]], %[[VAL_9]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
+// CHECK: %[[VAL_10:.*]] = llvm.mlir.constant(40 : i32) : i32
+// CHECK: "llvm.intr.memcpy"(%[[VAL_6]], %[[VAL_2]], %[[VAL_10]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
+// CHECK: %[[VAL_11:.*]] = llvm.mlir.constant(64 : i32) : i32
+// CHECK: "llvm.intr.memcpy"(%[[VAL_7]], %[[VAL_3]], %[[VAL_11]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
+// CHECK: llvm.return
+// CHECK: }
// -----
@@ -935,8 +937,8 @@ func.func @test_load_box(%addr : !fir.ref<!fir.box<!fir.array<10xf32>>>) {
// GENERIC-NEXT: %[[box_copy:.*]] = llvm.alloca %[[c1]] x !llvm.struct<([[DESC_TYPE:.*]])>
// AMDGPU-NEXT: %[[alloca_box_copy:.*]] = llvm.alloca %[[c1]] x !llvm.struct<([[DESC_TYPE:.*]])>{{.*}} : (i32) -> !llvm.ptr<5>
// AMDGPU-NEXT: %[[box_copy:.*]] = llvm.addrspacecast %[[alloca_box_copy]] : !llvm.ptr<5> to !llvm.ptr
-// CHECK-NEXT: %[[box_val:.*]] = llvm.load %[[arg0]] : !llvm.ptr -> !llvm.struct<([[DESC_TYPE]])>
-// CHECK-NEXT: llvm.store %[[box_val]], %[[box_copy]] : !llvm.struct<([[DESC_TYPE]])>, !llvm.ptr
+// CHECK-NEXT: %[[size:.*]] = llvm.mlir.constant(48 : i32) : i32
+// CHECK-NEXT: "llvm.intr.memcpy"(%[[box_copy]], %[[arg0]], %[[size]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
// CHECK-NEXT: llvm.call @takes_box(%[[box_copy]]) : (!llvm.ptr) -> ()
// CHECK-NEXT: llvm.return
// CHECK-NEXT: }
diff --git a/flang/test/Fir/embox-char.fir b/flang/test/Fir/embox-char.fir
index bf8344dbb60fc8..efb069f96520d4 100644
--- a/flang/test/Fir/embox-char.fir
+++ b/flang/test/Fir/embox-char.fir
@@ -1,3 +1,10 @@
+// NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
+
+// The script is designed to make adding checks to
+// a test case fast, it is *not* designed to be authoritative
+// about what constitutes a good test! The CHECK should be
+// minimized and named to reflect the test intent.
+
// Test that the offset of the first element of the slice
// is computed in elements of the type used for the GEP
// computing the base of the slice.
@@ -10,42 +17,40 @@
// print *, x(2,:)
// end subroutine
-// CHECK-LABEL: llvm.func @test_char4(
-// CHECK-SAME: %[[VAL_0:.*]]: !llvm.ptr,
-// CHECK-SAME: %[[VAL_1_SLICE_LB0:.*]]: i64, %[[VAL_2_SLICE_EX0:.*]]: i64, %[[VAL_3_SLICE_ST0:.*]]: i64, %[[VAL_4_SLICE_LB1:.*]]: i64, %[[VAL_5_SLICE_EX1:.*]]: i64, %[[VAL_6_SLICE_ST1:.*]]: i64) {
+// CHECK: llvm.func @test_char4(%[[VAL_0:.*]]: !llvm.ptr, %[[VAL_1:.*]]: i64, %[[VAL_2:.*]]: i64, %[[VAL_3:.*]]: i64, %[[VAL_4:.*]]: i64, %[[VAL_5:.*]]: i64, %[[VAL_6:.*]]: i64) {
// CHECK: %[[VAL_7:.*]] = llvm.mlir.constant(1 : i32) : i32
// CHECK: %[[VAL_8:.*]] = llvm.alloca %[[VAL_7]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
// CHECK: %[[VAL_9:.*]] = llvm.mlir.constant(1 : i32) : i32
// CHECK: %[[VAL_10:.*]] = llvm.alloca %[[VAL_9]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
// CHECK: %[[VAL_11:.*]] = llvm.mlir.constant(0 : index) : i64
// CHECK: %[[VAL_12:.*]] = llvm.mlir.constant(1 : index) : i64
-// CHECK: %[[VAL_13_WIDTH:.*]] = llvm.mlir.constant(4 : index) : i64
-// CHECK: %[[VAL_14:.*]] = llvm.load %[[VAL_0]] : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>
-// CHECK: llvm.store %[[VAL_14]], %[[VAL_10]] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>, !llvm.ptr
+// CHECK: %[[VAL_13:.*]] = llvm.mlir.constant(4 : index) : i64
+// CHECK: %[[VAL_14:.*]] = llvm.mlir.constant(72 : i32) : i32
+// CHECK: "llvm.intr.memcpy"(%[[VAL_10]], %[[VAL_0]], %[[VAL_14]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
// CHECK: %[[VAL_15:.*]] = llvm.getelementptr %[[VAL_10]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>
-// CHECK: %[[VAL_16_BYTESIZE:.*]] = llvm.load %[[VAL_15]] : !llvm.ptr -> i64
+// CHECK: %[[VAL_16:.*]] = llvm.load %[[VAL_15]] : !llvm.ptr -> i64
// CHECK: %[[VAL_17:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_12]], 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>
-// CHECK: %[[VAL_18_LB1:.*]] = llvm.load %[[VAL_17]] : !llvm.ptr -> i64
+// CHECK: %[[VAL_18:.*]] = llvm.load %[[VAL_17]] : !llvm.ptr -> i64
// CHECK: %[[VAL_19:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_12]], 1] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>
-// CHECK: %[[VAL_20_EX1:.*]] = llvm.load %[[VAL_19]] : !llvm.ptr -> i64
+// CHECK: %[[VAL_20:.*]] = llvm.load %[[VAL_19]] : !llvm.ptr -> i64
// CHECK: %[[VAL_21:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_12]], 2] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>
-// CHECK: %[[VAL_22_ST1:.*]] = llvm.load %[[VAL_21]] : !llvm.ptr -> i64
+// CHECK: %[[VAL_22:.*]] = llvm.load %[[VAL_21]] : !llvm.ptr -> i64
// CHECK: %[[VAL_23:.*]] = llvm.getelementptr %[[VAL_10]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>
-// CHECK: %[[VAL_24_BASEPTR:.*]] = llvm.load %[[VAL_23]] : !llvm.ptr -> !llvm.ptr
+// CHECK: %[[VAL_24:.*]] = llvm.load %[[VAL_23]] : !llvm.ptr -> !llvm.ptr
// CHECK: %[[VAL_25:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_11]], 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>
-// CHECK: %[[VAL_26_LB0:.*]] = llvm.load %[[VAL_25]] : !llvm.ptr -> i64
+// CHECK: %[[VAL_26:.*]] = llvm.load %[[VAL_25]] : !llvm.ptr -> i64
// CHECK: %[[VAL_27:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_11]], 1] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x arr...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/113949
More information about the flang-commits
mailing list