[llvm] [MemCpyOpt] lifetime markers reconstruction for Stack-move optimization (PR #68990)
Kohei Asano via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 13 06:37:40 PDT 2023
https://github.com/khei4 updated https://github.com/llvm/llvm-project/pull/68990
>From f4781f132644f247ddb12beeda131c61c61735cb Mon Sep 17 00:00:00 2001
From: khei4 <kk.asano.luxy at gmail.com>
Date: Tue, 26 Sep 2023 15:24:17 +0900
Subject: [PATCH 2/2] [MemCpyOpt] reconstruction of lifetime markers, after
stack-move optimization.
---
.../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 99 +++++++++++++-
.../Transforms/MemCpyOpt/lifetime-missing.ll | 1 +
llvm/test/Transforms/MemCpyOpt/stack-move.ll | 122 ++++++++++++++++++
3 files changed, 216 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 688bcfa57158975..7e5cdd76ef514da 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1424,6 +1424,46 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
return true;
}
+namespace {
+
+using InsertionPt = PointerUnion<Instruction *, BasicBlock *>;
+/// Find the nearest Instruction or BasicBlock that dominates both I1 and
+/// I2.
+static InsertionPt findNearestCommonDominator(InsertionPt I1, InsertionPt I2,
+ DominatorTree *DT) {
+ auto GetParent = [](InsertionPt I) {
+ if (auto *BB = dyn_cast<BasicBlock *>(I))
+ return BB;
+ return cast<Instruction *>(I)->getParent();
+ };
+ BasicBlock *BB1 = GetParent(I1);
+ BasicBlock *BB2 = GetParent(I2);
+ if (BB1 == BB2) {
+ // BasicBlock InsertionPt means the terminator.
+ if (isa<BasicBlock *>(I1))
+ return I2;
+ if (isa<BasicBlock *>(I2))
+ return I1;
+ return cast<Instruction *>(I1)->comesBefore(cast<Instruction *>(I2)) ? I1
+ : I2;
+ }
+
+ // These checks are necessary, because findNearestCommonDominator for NodeT
+ // doesn't handle these.
+ if (!DT->isReachableFromEntry(BB2))
+ return I1;
+ if (!DT->isReachableFromEntry(BB1))
+ return I2;
+
+ BasicBlock *DomBB = DT->findNearestCommonDominator(BB1, BB2);
+ if (BB2 == DomBB)
+ return I2;
+ if (BB1 == DomBB)
+ return I1;
+ return DomBB;
+}
+
+} // namespace
// Attempts to optimize the pattern whereby memory is copied from an alloca to
// another alloca, where the two allocas don't have conflicting mod/ref. If
// successful, the two allocas can be merged into one and the transfer can be
@@ -1466,13 +1506,27 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
return false;
// Check that src and dest are never captured, unescaped allocas. Also
- // find the nearest common dominator and postdominator for all users in
+ // find the nearest common dominator and post-dominator for all users in
// order to shrink wrap the lifetimes, and instructions with noalias metadata
// to remove them.
SmallVector<Instruction *, 4> LifetimeMarkers;
+ InsertionPt StartPt = nullptr;
+ SmallSet<Instruction *, 4> EndPts;
SmallSet<Instruction *, 4> NoAliasInstrs;
bool SrcNotDom = false;
+ auto InsertIfNotPDom = [&](Instruction *I) {
+ for (auto EndPt : EndPts) {
+ if (PDT->dominates(EndPt, I))
+ return;
+ else if (PDT->dominates(I, EndPt)) {
+ EndPt = I;
+ return;
+ }
+ }
+ EndPts.insert(I);
+ return;
+ };
// Recursively track the user and check whether modified alias exist.
auto IsDereferenceableOrNull = [](Value *V, const DataLayout &DL) -> bool {
@@ -1514,6 +1568,13 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
Worklist.push_back(UI);
continue;
case UseCaptureKind::NO_CAPTURE: {
+ if (!StartPt) {
+ StartPt = UI;
+ EndPts.insert(UI);
+ } else {
+ StartPt = findNearestCommonDominator(StartPt, UI, DT);
+ InsertIfNotPDom(UI);
+ }
if (UI->isLifetimeStartOrEnd()) {
// We note the locations of these intrinsic calls so that we can
// delete them later if the optimization succeeds, this is safe
@@ -1623,12 +1684,38 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
// Drop metadata on the source alloca.
SrcAlloca->dropUnknownNonDebugMetadata();
- // TODO: Reconstruct merged lifetime markers.
- // Remove all other lifetime markers. if the original lifetime intrinsics
- // exists.
+ // Reconstruction of lifetime markers.
+ // 1. Insert new lifetime.start to the dominator for all src and dest uses.
+ // 2. Remove original markers except possibly last lifetime.end
if (!LifetimeMarkers.empty()) {
- for (Instruction *I : LifetimeMarkers)
- eraseInstruction(I);
+ LLVMContext &C = SrcAlloca->getContext();
+ IRBuilder<> Builder(C);
+ ConstantInt *AllocaSize = ConstantInt::get(Type::getInt64Ty(C), -1);
+ if (!Size.isScalable())
+ AllocaSize = ConstantInt::get(Type::getInt64Ty(C), Size);
+ // Create a new lifetime start marker before the first user of src or alloca
+ // users.
+ MemoryAccess *StartMA;
+ if (auto *DomI = dyn_cast_if_present<Instruction *>(StartPt)) {
+ Builder.SetInsertPoint(DomI->getParent(), DomI->getIterator());
+ auto *Start = Builder.CreateLifetimeStart(SrcAlloca, AllocaSize);
+ StartMA = MSSAU->createMemoryAccessBefore(Start, nullptr,
+ MSSA->getMemoryAccess(DomI));
+ } else {
+ auto *DomB = cast<BasicBlock *>(StartPt);
+ Builder.SetInsertPoint(DomB->getTerminator());
+ auto *Start = Builder.CreateLifetimeStart(SrcAlloca, AllocaSize);
+ StartMA = MSSAU->createMemoryAccessInBB(
+ Start, nullptr, Start->getParent(), MemorySSA::BeforeTerminator);
+ }
+ MSSAU->insertDef(cast<MemoryDef>(StartMA), /*RenameUses=*/true);
+
+ // Remove all lifetime markers except . if the original lifetime intrinsics
+ // exists.
+ for (Instruction *I : LifetimeMarkers) {
+ if (EndPts.find(I) == EndPts.end())
+ eraseInstruction(I);
+ }
}
// As this transformation can cause memory accesses that didn't previously
diff --git a/llvm/test/Transforms/MemCpyOpt/lifetime-missing.ll b/llvm/test/Transforms/MemCpyOpt/lifetime-missing.ll
index 0626f09702f7e21..e3190cee45d2da4 100644
--- a/llvm/test/Transforms/MemCpyOpt/lifetime-missing.ll
+++ b/llvm/test/Transforms/MemCpyOpt/lifetime-missing.ll
@@ -15,6 +15,7 @@ define void @test() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[AGG_TMP_SROA_14:%.*]] = alloca [20 x i8], align 4
; CHECK-NEXT: [[AGG_TMP_SROA_14_128_SROA_IDX:%.*]] = getelementptr i8, ptr [[AGG_TMP_SROA_14]], i64 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 20, ptr [[AGG_TMP_SROA_14]])
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[AGG_TMP_SROA_14_128_SROA_IDX]], i8 0, i64 1, i1 false)
; CHECK-NEXT: [[AGG_TMP3_SROA_35_128_SROA_IDX:%.*]] = getelementptr i8, ptr [[AGG_TMP_SROA_14]], i64 4
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr inttoptr (i64 4 to ptr), i8 0, i64 1, i1 false)
diff --git a/llvm/test/Transforms/MemCpyOpt/stack-move.ll b/llvm/test/Transforms/MemCpyOpt/stack-move.ll
index 6089c0a4d7cf507..998b742e480bb73 100644
--- a/llvm/test/Transforms/MemCpyOpt/stack-move.ll
+++ b/llvm/test/Transforms/MemCpyOpt/stack-move.ll
@@ -24,9 +24,11 @@ declare i32 @use_writeonly(ptr noundef) memory(write)
define void @basic_memcpy() {
; CHECK-LABEL: define void @basic_memcpy() {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4
@@ -67,9 +69,11 @@ define i32 @use_not_dominated_by_src_alloca() {
define void @basic_memmove() {
; CHECK-LABEL: define void @basic_memmove() {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4
@@ -92,9 +96,11 @@ define void @basic_memmove() {
define void @load_store() {
; CHECK-LABEL: define void @load_store() {
; CHECK-NEXT: [[SRC:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[SRC]])
; CHECK-NEXT: store i32 42, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[SRC]])
; CHECK-NEXT: ret void
;
%src = alloca i32, align 4
@@ -118,9 +124,11 @@ define void @load_store_scalable(<vscale x 4 x i32> %x) {
; CHECK-LABEL: define void @load_store_scalable
; CHECK-SAME: (<vscale x 4 x i32> [[X:%.*]]) {
; CHECK-NEXT: [[SRC:%.*]] = alloca <vscale x 4 x i32>, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr [[SRC]])
; CHECK-NEXT: store <vscale x 4 x i32> [[X]], ptr [[SRC]], align 16
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 -1, ptr nocapture [[SRC]])
; CHECK-NEXT: ret void
;
%src = alloca <vscale x 4 x i32>
@@ -144,9 +152,11 @@ define void @load_store_scalable(<vscale x 4 x i32> %x) {
define void @align_up() {
; CHECK-LABEL: define void @align_up() {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4
@@ -169,10 +179,12 @@ define void @align_up() {
define void @remove_extra_lifetime_intrinsics() {
; CHECK-LABEL: define void @remove_extra_lifetime_intrinsics() {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4
@@ -221,11 +233,13 @@ define void @no_lifetime() {
define void @alias_no_mod() {
; CHECK-LABEL: define void @alias_no_mod() {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: [[DEST_ALIAS:%.*]] = getelementptr [[STRUCT_FOO]], ptr [[SRC]], i32 0, i32 0
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
; CHECK-NEXT: [[SRC_ALIAS:%.*]] = getelementptr [[STRUCT_FOO]], ptr [[SRC]], i32 0, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4
@@ -258,9 +272,11 @@ define void @alias_no_mod() {
define void @remove_scoped_noalias() {
; CHECK-LABEL: define void @remove_scoped_noalias() {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]), !alias.scope !0
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4
@@ -282,9 +298,11 @@ define void @remove_scoped_noalias() {
define void @remove_alloca_metadata() {
; CHECK-LABEL: define void @remove_alloca_metadata() {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]), !alias.scope !0
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4, !annotation !3
@@ -307,9 +325,11 @@ define void @remove_alloca_metadata() {
define void @noalias_on_lifetime() {
; CHECK-LABEL: define void @noalias_on_lifetime() {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]), !alias.scope !0
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]), !noalias !0
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4
@@ -331,9 +351,11 @@ define void @noalias_on_lifetime() {
define void @src_ref_dest_ref_after_copy() {
; CHECK-LABEL: define void @src_ref_dest_ref_after_copy() {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_readonly(ptr nocapture [[SRC]])
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_readonly(ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4
@@ -355,9 +377,11 @@ define void @src_ref_dest_ref_after_copy() {
define void @src_mod_dest_mod_after_copy() {
; CHECK-LABEL: define void @src_mod_dest_mod_after_copy() {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_writeonly(ptr nocapture [[SRC]])
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_writeonly(ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4
@@ -378,6 +402,7 @@ define void @src_mod_dest_mod_after_copy() {
define void @avoid_memory_use_last_user_crash() {
; CHECK-LABEL: define void @avoid_memory_use_last_user_crash() {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[SRC]], align 4
; CHECK-NEXT: ret void
@@ -396,6 +421,7 @@ define void @avoid_memory_use_last_user_crash() {
define void @terminator_lastuse() personality i32 0 {
; CHECK-LABEL: define void @terminator_lastuse() personality i32 0 {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
; CHECK-NEXT: [[RV:%.*]] = invoke i32 @use_nocapture(ptr [[SRC]])
@@ -430,6 +456,7 @@ define void @multi_bb_memcpy(i1 %b) {
; CHECK-LABEL: define void @multi_bb_memcpy
; CHECK-SAME: (i1 [[B:%.*]]) {
; CHECK-NEXT: [[SRC:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[SRC]])
; CHECK-NEXT: store i32 42, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
; CHECK-NEXT: br label [[BB0:%.*]]
@@ -437,6 +464,7 @@ define void @multi_bb_memcpy(i1 %b) {
; CHECK-NEXT: br label [[BB1:%.*]]
; CHECK: bb1:
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[SRC]])
; CHECK-NEXT: ret void
;
%src = alloca i32, align 4
@@ -462,11 +490,13 @@ define void @multi_bb_load_store(i1 %b) {
; CHECK-LABEL: define void @multi_bb_load_store
; CHECK-SAME: (i1 [[B:%.*]]) {
; CHECK-NEXT: [[SRC:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[SRC]])
; CHECK-NEXT: store i32 42, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
; CHECK-NEXT: br label [[BB0:%.*]]
; CHECK: bb0:
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[SRC]])
; CHECK-NEXT: ret void
;
%src = alloca i32, align 4
@@ -534,6 +564,7 @@ define void @multi_bb_simple_br(i1 %b) {
; CHECK-LABEL: define void @multi_bb_simple_br
; CHECK-SAME: (i1 [[B:%.*]]) {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
; CHECK-NEXT: br i1 [[B]], label [[BB0:%.*]], label [[BB1:%.*]]
@@ -544,6 +575,7 @@ define void @multi_bb_simple_br(i1 %b) {
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
; CHECK-NEXT: br label [[BB2]]
; CHECK: bb2:
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4
@@ -574,6 +606,7 @@ define void @multi_bb_dom_test0(i1 %b) {
; CHECK-LABEL: define void @multi_bb_dom_test0
; CHECK-SAME: (i1 [[B:%.*]]) {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: br i1 [[B]], label [[BB0:%.*]], label [[BB1:%.*]]
; CHECK: bb0:
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
@@ -657,6 +690,7 @@ define void @multi_bb_pdom_test0(i1 %b) {
; CHECK-LABEL: define void @multi_bb_pdom_test0
; CHECK-SAME: (i1 [[B:%.*]]) {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
; CHECK-NEXT: br i1 [[B]], label [[BB0:%.*]], label [[BB1:%.*]]
; CHECK: bb0:
@@ -697,6 +731,7 @@ define void @multi_bb_pdom_test1(i1 %b) {
; CHECK-LABEL: define void @multi_bb_pdom_test1
; CHECK-SAME: (i1 [[B:%.*]]) {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
; CHECK-NEXT: br i1 [[B]], label [[BB0:%.*]], label [[BB1:%.*]]
; CHECK: bb0:
@@ -735,6 +770,7 @@ define void @multi_bb_pdom_test2(i1 %b) {
; CHECK-LABEL: define void @multi_bb_pdom_test2
; CHECK-SAME: (i1 [[B:%.*]]) {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
; CHECK-NEXT: ret void
@@ -763,12 +799,49 @@ unr2:
}
+define void @retain_last_end(i1 %b0) {
+; CHECK-LABEL: define void @retain_last_end
+; CHECK-SAME: (i1 [[B0:%.*]]) {
+; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
+; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
+; CHECK-NEXT: br i1 [[B0]], label [[BB0:%.*]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
+; CHECK-NEXT: ret void
+; CHECK: bb0:
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
+; CHECK-NEXT: ret void
+;
+ %src = alloca %struct.Foo, align 4
+ %dest = alloca %struct.Foo, align 4
+ call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %src)
+ call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %dest)
+ store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src
+ %1 = call i32 @use_nocapture(ptr noundef nocapture %src)
+ br i1 %b0, label %bb0, label %exit
+
+exit:
+ %2 = call i32 @use_nocapture(ptr noundef nocapture %src)
+ ret void
+
+bb0:
+ call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest, ptr align 4 %src, i64 12, i1 false)
+ call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %src)
+ call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %dest)
+ ret void
+}
+
define void @multi_bb_loop(i32 %n) {
; CHECK-LABEL: define void @multi_bb_loop
; CHECK-SAME: (i32 [[N:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[NLT1:%.*]] = icmp slt i32 [[N]], 1
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 0, i32 1, i32 42 }, ptr [[SRC]], align 4
; CHECK-NEXT: br i1 [[NLT1]], label [[LOOP_EXIT:%.*]], label [[LOOP_BODY:%.*]]
; CHECK: loop_body:
@@ -801,10 +874,56 @@ loop_exit:
ret void
}
+define void @dont_insert_end_loop(i32 %n) {
+; CHECK-LABEL: define void @dont_insert_end_loop
+; CHECK-SAME: (i32 [[N:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[NLT1:%.*]] = icmp slt i32 [[N]], 1
+; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
+; CHECK-NEXT: store [[STRUCT_FOO]] { i32 0, i32 1, i32 42 }, ptr [[SRC]], align 4
+; CHECK-NEXT: br label [[PRE:%.*]]
+; CHECK: pre:
+; CHECK-NEXT: br label [[LOOP_BODY:%.*]]
+; CHECK: loop_body:
+; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[SRC]], align 4
+; CHECK-NEXT: [[NEW_V:%.*]] = add i32 [[V]], 1
+; CHECK-NEXT: store i32 [[NEW_V]], ptr [[SRC]], align 4
+; CHECK-NEXT: [[IGTN:%.*]] = icmp sgt i32 [[NEW_V]], [[N]]
+; CHECK-NEXT: br i1 [[IGTN]], label [[LOOP_EXIT:%.*]], label [[PRE]]
+; CHECK: loop_exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ %nlt1 = icmp slt i32 %n, 1
+ %src = alloca %struct.Foo, align 8
+ %dest = alloca %struct.Foo, align 8
+ call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %src)
+ call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %dest)
+ store %struct.Foo { i32 0, i32 1, i32 42 }, ptr %src
+ br label %pre
+
+pre:
+ br label %loop_body
+
+loop_body:
+ call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 12, i1 false)
+ %v = load i32, ptr %src
+ %new_v = add i32 %v, 1
+ store i32 %new_v, ptr %src
+ %igtn = icmp sgt i32 %new_v, %n
+ br i1 %igtn, label %loop_exit, label %pre
+
+loop_exit:
+ ret void
+}
+
define void @multi_bb_unreachable_modref(i1 %b0) {
; CHECK-LABEL: define void @multi_bb_unreachable_modref
; CHECK-SAME: (i1 [[B0:%.*]]) {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
; CHECK-NEXT: br i1 [[B0]], label [[BB0:%.*]], label [[EXIT:%.*]]
@@ -812,6 +931,7 @@ define void @multi_bb_unreachable_modref(i1 %b0) {
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
; CHECK-NEXT: ret void
; CHECK: bb0:
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4
@@ -837,6 +957,7 @@ define void @multi_bb_non_dominated(i1 %b0, i1 %b1) {
; CHECK-LABEL: define void @multi_bb_non_dominated
; CHECK-SAME: (i1 [[B0:%.*]], i1 [[B1:%.*]]) {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
; CHECK-NEXT: br i1 [[B0]], label [[BB0:%.*]], label [[BB1:%.*]]
@@ -846,6 +967,7 @@ define void @multi_bb_non_dominated(i1 %b0, i1 %b1) {
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
; CHECK-NEXT: br label [[BB2]]
; CHECK: bb2:
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4
More information about the llvm-commits
mailing list