[llvm] ac00726 - Reapply "Revert "[MemCpyOpt] implement multi BB stack-move optimization""
via llvm-commits
llvm-commits at lists.llvm.org
Sat Aug 26 14:50:28 PDT 2023
Author: khei4
Date: 2023-08-27T06:50:19+09:00
New Revision: ac0072602c9d01fc031a2d0acb418f7191480ef0
URL: https://github.com/llvm/llvm-project/commit/ac0072602c9d01fc031a2d0acb418f7191480ef0
DIFF: https://github.com/llvm/llvm-project/commit/ac0072602c9d01fc031a2d0acb418f7191480ef0.diff
LOG: Reapply "Revert "[MemCpyOpt] implement multi BB stack-move optimization""
This reverts commit 3bb32c61b2f1f5d14dd056dd198dc898dce5a44e.
Use InsertionPt for DT to handle non-memory access dominators
Differential Revision: https://reviews.llvm.org/D155406
Added:
Modified:
llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
llvm/test/Other/new-pm-defaults.ll
llvm/test/Other/new-pm-lto-defaults.ll
llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
llvm/test/Other/new-pm-thinlto-prelink-defaults.ll
llvm/test/Transforms/MemCpyOpt/stack-move.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
index d3e5e2591eea11..3e8a5bf6a5bd56 100644
--- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
+++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
@@ -34,6 +34,7 @@ class MemMoveInst;
class MemorySSA;
class MemorySSAUpdater;
class MemSetInst;
+class PostDominatorTree;
class StoreInst;
class TargetLibraryInfo;
class Value;
@@ -43,6 +44,7 @@ class MemCpyOptPass : public PassInfoMixin<MemCpyOptPass> {
AAResults *AA = nullptr;
AssumptionCache *AC = nullptr;
DominatorTree *DT = nullptr;
+ PostDominatorTree *PDT = nullptr;
MemorySSA *MSSA = nullptr;
MemorySSAUpdater *MSSAU = nullptr;
@@ -53,7 +55,8 @@ class MemCpyOptPass : public PassInfoMixin<MemCpyOptPass> {
// Glue for the old PM.
bool runImpl(Function &F, TargetLibraryInfo *TLI, AAResults *AA,
- AssumptionCache *AC, DominatorTree *DT, MemorySSA *MSSA);
+ AssumptionCache *AC, DominatorTree *DT, PostDominatorTree *PDT,
+ MemorySSA *MSSA);
private:
// Helper functions
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 6015bdf88a62ea..2f89b194d4acac 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -19,12 +19,14 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
@@ -1415,6 +1417,66 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
return true;
}
+using InsertionPt = PointerUnion<Instruction *, BasicBlock *>;
+/// Find the nearest Instruction or BasicBlock that dominates both I1 and
+/// I2.
+static InsertionPt findNearestCommonDominator(InsertionPt I1, InsertionPt I2,
+ DominatorTree *DT) {
+ auto GetParent = [](InsertionPt I) {
+ if (auto *BB = dyn_cast<BasicBlock *>(I))
+ return BB;
+ return cast<Instruction *>(I)->getParent();
+ };
+ BasicBlock *BB1 = GetParent(I1);
+ BasicBlock *BB2 = GetParent(I2);
+ if (BB1 == BB2) {
+ // BasicBlock InsertionPt means the terminator.
+ if (isa<BasicBlock *>(I1))
+ return I2;
+ if (isa<BasicBlock *>(I2))
+ return I1;
+ return cast<Instruction *>(I1)->comesBefore(cast<Instruction *>(I2)) ? I1
+ : I2;
+ }
+ BasicBlock *DomBB = DT->findNearestCommonDominator(BB1, BB2);
+ if (BB2 == DomBB)
+ return I2;
+ if (BB1 == DomBB)
+ return I1;
+ return DomBB;
+}
+
+/// Find the nearest Instruction or BasicBlock that post-dominates both I1 and
+/// I2.
+static InsertionPt findNearestCommonPostDominator(InsertionPt I1,
+ InsertionPt I2,
+ PostDominatorTree *PDT) {
+ auto GetParent = [](InsertionPt I) {
+ if (auto *BB = dyn_cast<BasicBlock *>(I))
+ return BB;
+ return cast<Instruction *>(I)->getParent();
+ };
+ BasicBlock *BB1 = GetParent(I1);
+ BasicBlock *BB2 = GetParent(I2);
+ if (BB1 == BB2) {
+ // BasicBlock InsertionPt means the first non-phi instruction.
+ if (isa<BasicBlock *>(I1))
+ return I2;
+ if (isa<BasicBlock *>(I2))
+ return I1;
+ return cast<Instruction *>(I1)->comesBefore(cast<Instruction *>(I2)) ? I2
+ : I1;
+ }
+ BasicBlock *PDomBB = PDT->findNearestCommonDominator(BB1, BB2);
+ if (!PDomBB)
+ return nullptr;
+ if (BB2 == PDomBB)
+ return I2;
+ if (BB1 == PDomBB)
+ return I1;
+ return PDomBB;
+}
+
// Attempts to optimize the pattern whereby memory is copied from an alloca to
// another alloca, where the two allocas don't have conflicting mod/ref. If
// successful, the two allocas can be merged into one and the transfer can be
@@ -1440,8 +1502,7 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
return false;
}
- // 1. Check that copy is full. Calculate the static size of the allocas to be
- // merged, bail out if we can't.
+ // Check that copy is full with static size.
const DataLayout &DL = DestAlloca->getModule()->getDataLayout();
std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize(DL);
if (!SrcSize || SrcSize->isScalable() || Size != SrcSize->getFixedValue()) {
@@ -1455,19 +1516,16 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
return false;
}
- // 2-1. Check that src and dest are static allocas, which are not affected by
- // stacksave/stackrestore.
- if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca() ||
- SrcAlloca->getParent() != Load->getParent() ||
- SrcAlloca->getParent() != Store->getParent())
+ if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca())
return false;
- // 2-2. Check that src and dest are never captured, unescaped allocas. Also
- // collect lifetime markers first/last users in order to shrink wrap the
- // lifetimes, and instructions with noalias metadata to remove them.
+ // Check that src and dest are never captured, unescaped allocas. Also
+ // find the nearest common dominator and postdominator for all users in
+ // order to shrink wrap the lifetimes, and instructions with noalias metadata
+ // to remove them.
SmallVector<Instruction *, 4> LifetimeMarkers;
- Instruction *FirstUser = nullptr, *LastUser = nullptr;
+ InsertionPt Dom = nullptr, PDom = nullptr;
SmallSet<Instruction *, 4> NoAliasInstrs;
// Recursively track the user and check whether modified alias exist.
@@ -1505,12 +1563,13 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
continue;
case UseCaptureKind::NO_CAPTURE: {
auto *UI = cast<Instruction>(U.getUser());
- if (DestAlloca->getParent() != UI->getParent())
- return false;
- if (!FirstUser || UI->comesBefore(FirstUser))
- FirstUser = UI;
- if (!LastUser || LastUser->comesBefore(UI))
- LastUser = UI;
+ if (!Dom) {
+ PDom = Dom = UI;
+ } else {
+ Dom = findNearestCommonDominator(Dom, UI, DT);
+ if (PDom)
+ PDom = findNearestCommonPostDominator(PDom, UI, PDT);
+ }
if (UI->isLifetimeStartOrEnd()) {
// We note the locations of these intrinsic calls so that we can
// delete them later if the optimization succeeds, this is safe
@@ -1534,37 +1593,64 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
return true;
};
- // 3. Check that dest has no Mod/Ref, except full size lifetime intrinsics,
- // from the alloca to the Store.
+ // Check that dest has no Mod/Ref, from the alloca to the Store, except full
+ // size lifetime intrinsics. And collect modref inst for the reachability
+ // check.
ModRefInfo DestModRef = ModRefInfo::NoModRef;
MemoryLocation DestLoc(DestAlloca, LocationSize::precise(Size));
+ SmallVector<BasicBlock *, 8> ReachabilityWorklist;
auto DestModRefCallback = [&](Instruction *UI) -> bool {
// We don't care about the store itself.
if (UI == Store)
return true;
ModRefInfo Res = BAA.getModRefInfo(UI, DestLoc);
- // FIXME: For multi-BB cases, we need to see reachability from it to
- // store.
- // Bailout if Dest may have any ModRef before Store.
- if (UI->comesBefore(Store) && isModOrRefSet(Res))
- return false;
- DestModRef |= BAA.getModRefInfo(UI, DestLoc);
+ DestModRef |= Res;
+ if (isModOrRefSet(Res)) {
+ // Instructions reachability checks.
+ // FIXME: adding the Instruction version isPotentiallyReachableFromMany on
+ // lib/Analysis/CFG.cpp (currently only for BasicBlocks) might be helpful.
+ if (UI->getParent() == Store->getParent()) {
+ // The same block case is special because it's the only time we're
+ // looking within a single block to see which instruction comes first.
+ // Once we start looking at multiple blocks, the first instruction of
+ // the block is reachable, so we only need to determine reachability
+ // between whole blocks.
+ BasicBlock *BB = UI->getParent();
+
+ // If A comes before B, then B is definitively reachable from A.
+ if (UI->comesBefore(Store))
+ return false;
+
+ // If the user's parent block is entry, no predecessor exists.
+ if (BB->isEntryBlock())
+ return true;
+ // Otherwise, continue doing the normal per-BB CFG walk.
+ ReachabilityWorklist.append(succ_begin(BB), succ_end(BB));
+ } else {
+ ReachabilityWorklist.push_back(UI->getParent());
+ }
+ }
return true;
};
if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback))
return false;
+ // Bailout if Dest may have any ModRef before Store.
+ if (!ReachabilityWorklist.empty() &&
+ isPotentiallyReachableFromMany(ReachabilityWorklist, Store->getParent(),
+ nullptr, DT, nullptr))
+ return false;
- // 3. Check that, from after the Load to the end of the BB,
- // 3-1. if the dest has any Mod, src has no Ref, and
- // 3-2. if the dest has any Ref, src has no Mod except full-sized lifetimes.
+ // Check that, from after the Load to the end of the BB,
+ // - if the dest has any Mod, src has no Ref, and
+ // - if the dest has any Ref, src has no Mod except full-sized lifetimes.
MemoryLocation SrcLoc(SrcAlloca, LocationSize::precise(Size));
auto SrcModRefCallback = [&](Instruction *UI) -> bool {
- // Any ModRef before Load doesn't matter, also Load and Store can be
- // ignored.
- if (UI->comesBefore(Load) || UI == Load || UI == Store)
+ // Any ModRef post-dominated by Load doesn't matter, also Load and Store
+ // themselves can be ignored.
+ if (PDT->dominates(Load, UI) || UI == Load || UI == Store)
return true;
ModRefInfo Res = BAA.getModRefInfo(UI, SrcLoc);
if ((isModSet(DestModRef) && isRefSet(Res)) ||
@@ -1596,22 +1682,48 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
ConstantInt *AllocaSize = ConstantInt::get(Type::getInt64Ty(C), Size);
// Create a new lifetime start marker before the first user of src or alloca
// users.
- Builder.SetInsertPoint(FirstUser->getParent(), FirstUser->getIterator());
- auto *Start = Builder.CreateLifetimeStart(SrcAlloca, AllocaSize);
- auto *FirstMA = MSSA->getMemoryAccess(FirstUser);
- auto *StartMA = MSSAU->createMemoryAccessBefore(Start, nullptr, FirstMA);
+ MemoryAccess *StartMA;
+ if (auto *DomI = dyn_cast_if_present<Instruction *>(Dom)) {
+ Builder.SetInsertPoint(DomI->getParent(), DomI->getIterator());
+ auto *Start = Builder.CreateLifetimeStart(SrcAlloca, AllocaSize);
+ StartMA = MSSAU->createMemoryAccessBefore(Start, nullptr,
+ MSSA->getMemoryAccess(DomI));
+ } else {
+ auto *DomB = cast<BasicBlock *>(Dom);
+ Builder.SetInsertPoint(DomB->getTerminator());
+ auto *Start = Builder.CreateLifetimeStart(SrcAlloca, AllocaSize);
+ StartMA = MSSAU->createMemoryAccessInBB(
+ Start, nullptr, Start->getParent(), MemorySSA::BeforeTerminator);
+ }
MSSAU->insertDef(cast<MemoryDef>(StartMA), /*RenameUses=*/true);
// Create a new lifetime end marker after the last user of src or alloca
- // users.
- // FIXME: If the last user is the terminator for the bb, we can insert
- // lifetime.end marker to the immidiate post-dominator, but currently do
- // nothing.
- if (!LastUser->isTerminator()) {
- Builder.SetInsertPoint(LastUser->getParent(), ++LastUser->getIterator());
- auto *End = Builder.CreateLifetimeEnd(SrcAlloca, AllocaSize);
- auto *LastMA = MSSA->getMemoryAccess(LastUser);
- auto *EndMA = MSSAU->createMemoryAccessAfter(End, nullptr, LastMA);
+ // users. If there's no such postdominator, just don't bother; we could
+ // create one at each exit block, but that'd be essentially semantically
+ // meaningless.
+ // If the PDom is the terminator (e.g. invoke), see the next immediate post
+ // dominator.
+ if (auto *PDomI = dyn_cast_if_present<Instruction *>(PDom);
+ PDomI && PDomI->isTerminator()) {
+ auto *IPDomNode = (*PDT)[PDomI->getParent()]->getIDom();
+ PDom = IPDomNode ? IPDomNode->getBlock() : nullptr;
+ }
+ if (PDom) {
+ MemoryAccess *EndMA;
+ if (auto *PDomI = dyn_cast<Instruction *>(PDom)) {
+ // If PDom is Instruction ptr, insert after it, because it's a user of
+ // SrcAlloca.
+ Builder.SetInsertPoint(PDomI->getParent(), ++PDomI->getIterator());
+ auto *End = Builder.CreateLifetimeEnd(SrcAlloca, AllocaSize);
+ EndMA = MSSAU->createMemoryAccessAfter(End, nullptr,
+ MSSA->getMemoryAccess(PDomI));
+ } else {
+ auto *PDomB = cast<BasicBlock *>(PDom);
+ Builder.SetInsertPoint(PDomB, PDomB->getFirstInsertionPt());
+ auto *End = Builder.CreateLifetimeEnd(SrcAlloca, AllocaSize);
+ EndMA = MSSAU->createMemoryAccessInBB(End, nullptr, End->getParent(),
+ MemorySSA::Beginning);
+ }
MSSAU->insertDef(cast<MemoryDef>(EndMA), /*RenameUses=*/true);
}
@@ -1999,9 +2111,10 @@ PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
auto *AA = &AM.getResult<AAManager>(F);
auto *AC = &AM.getResult<AssumptionAnalysis>(F);
auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
+ auto *PDT = &AM.getResult<PostDominatorTreeAnalysis>(F);
auto *MSSA = &AM.getResult<MemorySSAAnalysis>(F);
- bool MadeChange = runImpl(F, &TLI, AA, AC, DT, &MSSA->getMSSA());
+ bool MadeChange = runImpl(F, &TLI, AA, AC, DT, PDT, &MSSA->getMSSA());
if (!MadeChange)
return PreservedAnalyses::all();
@@ -2013,12 +2126,14 @@ PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
bool MemCpyOptPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
AliasAnalysis *AA_, AssumptionCache *AC_,
- DominatorTree *DT_, MemorySSA *MSSA_) {
+ DominatorTree *DT_, PostDominatorTree *PDT_,
+ MemorySSA *MSSA_) {
bool MadeChange = false;
TLI = TLI_;
AA = AA_;
AC = AC_;
DT = DT_;
+ PDT = PDT_;
MSSA = MSSA_;
MemorySSAUpdater MSSAU_(MSSA_);
MSSAU = &MSSAU_;
diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll
index 5cb9a7f331a680..016dfad98c69f7 100644
--- a/llvm/test/Other/new-pm-defaults.ll
+++ b/llvm/test/Other/new-pm-defaults.ll
@@ -190,6 +190,7 @@
; CHECK-O23SZ-NEXT: Running pass: GVNPass
; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis
; CHECK-O1-NEXT: Running pass: MemCpyOptPass
+; CHECK-O1-NEXT: Running analysis: PostDominatorTreeAnalysis
; CHECK-O-NEXT: Running pass: SCCPPass
; CHECK-O-NEXT: Running pass: BDCEPass
; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis
@@ -201,7 +202,7 @@
; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis
; CHECK-O1-NEXT: Running pass: CoroElidePass
; CHECK-O-NEXT: Running pass: ADCEPass
-; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis
+; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
; CHECK-O23SZ-NEXT: Running pass: DSEPass
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo
diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll
index 1b64760e42c1ef..c444197e0db706 100644
--- a/llvm/test/Other/new-pm-lto-defaults.ll
+++ b/llvm/test/Other/new-pm-lto-defaults.ll
@@ -103,8 +103,8 @@
; CHECK-O23SZ-NEXT: Running pass: GVNPass on foo
; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis on foo
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass on foo
-; CHECK-O23SZ-NEXT: Running pass: DSEPass on foo
; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis on foo
+; CHECK-O23SZ-NEXT: Running pass: DSEPass on foo
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo
; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass on foo
; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass on foo
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
index aa3b8e85749d95..79010c3eb80804 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
@@ -125,6 +125,7 @@
; CHECK-O23SZ-NEXT: Running pass: GVNPass
; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis
; CHECK-O1-NEXT: Running pass: MemCpyOptPass
+; CHECK-O1-NEXT: Running analysis: PostDominatorTreeAnalysis
; CHECK-O-NEXT: Running pass: SCCPPass
; CHECK-O-NEXT: Running pass: BDCEPass
; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis
@@ -135,7 +136,7 @@
; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis
; CHECK-O1-NEXT: Running pass: CoroElidePass
; CHECK-O-NEXT: Running pass: ADCEPass
-; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis
+; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
; CHECK-O23SZ-NEXT: Running pass: DSEPass
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo
diff --git a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll
index 7761ae84b3a125..0697fb253b1fe0 100644
--- a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll
@@ -157,6 +157,7 @@
; CHECK-O23SZ-NEXT: Running pass: GVNPass
; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis
; CHECK-O1-NEXT: Running pass: MemCpyOptPass
+; CHECK-O1-NEXT: Running analysis: PostDominatorTreeAnalysis
; CHECK-O-NEXT: Running pass: SCCPPass
; CHECK-O-NEXT: Running pass: BDCEPass
; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis
@@ -167,7 +168,7 @@
; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis
; CHECK-O1-NEXT: Running pass: CoroElidePass
; CHECK-O-NEXT: Running pass: ADCEPass
-; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis
+; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
; CHECK-O23SZ-NEXT: Running pass: DSEPass
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass
diff --git a/llvm/test/Transforms/MemCpyOpt/stack-move.ll b/llvm/test/Transforms/MemCpyOpt/stack-move.ll
index ec5c14ee0a7bf8..b74c66bf8196e0 100644
--- a/llvm/test/Transforms/MemCpyOpt/stack-move.ll
+++ b/llvm/test/Transforms/MemCpyOpt/stack-move.ll
@@ -406,24 +406,19 @@ suc:
ret void
}
-; TODO: merge allocas for bb-separated, but logically straight
define void @multi_bb_memcpy(i1 %b) {
; CHECK-LABEL: define void @multi_bb_memcpy
; CHECK-SAME: (i1 [[B:%.*]]) {
; CHECK-NEXT: [[SRC:%.*]] = alloca i32, align 4
-; CHECK-NEXT: [[DEST:%.*]] = alloca i32, align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[DEST]])
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[SRC]])
; CHECK-NEXT: store i32 42, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
; CHECK-NEXT: br label [[BB0:%.*]]
; CHECK: bb0:
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 4, i1 false)
; CHECK-NEXT: br label [[BB1:%.*]]
; CHECK: bb1:
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[DEST]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[SRC]])
; CHECK-NEXT: ret void
;
%src = alloca i32, align 4
@@ -445,23 +440,17 @@ bb1:
ret void
}
-; TODO: Merge alloca
define void @multi_bb_load_store(i1 %b) {
; CHECK-LABEL: define void @multi_bb_load_store
; CHECK-SAME: (i1 [[B:%.*]]) {
; CHECK-NEXT: [[SRC:%.*]] = alloca i32, align 4
-; CHECK-NEXT: [[DEST:%.*]] = alloca i32, align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[DEST]])
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[SRC]])
; CHECK-NEXT: store i32 42, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
-; CHECK-NEXT: [[SRC_VAL:%.*]] = load i32, ptr [[SRC]], align 4
-; CHECK-NEXT: store i32 [[SRC_VAL]], ptr [[DEST]], align 4
; CHECK-NEXT: br label [[BB0:%.*]]
; CHECK: bb0:
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[DEST]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[SRC]])
; CHECK-NEXT: ret void
;
%src = alloca i32, align 4
@@ -525,28 +514,22 @@ bb1:
ret void
}
-; TODO: merge allocas for multi basicblocks, s.t. all copy-dominated
-; uses are satisfy the condition.
define void @multi_bb_simple_br(i1 %b) {
; CHECK-LABEL: define void @multi_bb_simple_br
; CHECK-SAME: (i1 [[B:%.*]]) {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
-; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
; CHECK-NEXT: br i1 [[B]], label [[BB0:%.*]], label [[BB1:%.*]]
; CHECK: bb0:
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
; CHECK-NEXT: br label [[BB2:%.*]]
; CHECK: bb1:
-; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]])
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
; CHECK-NEXT: br label [[BB2]]
; CHECK: bb2:
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4
@@ -577,7 +560,7 @@ define void @multi_bb_dom_test0(i1 %b) {
; CHECK-LABEL: define void @multi_bb_dom_test0
; CHECK-SAME: (i1 [[B:%.*]]) {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
-; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: br i1 [[B]], label [[BB0:%.*]], label [[BB1:%.*]]
; CHECK: bb0:
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
@@ -586,9 +569,8 @@ define void @multi_bb_dom_test0(i1 %b) {
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 40, i32 50, i32 60 }, ptr [[SRC]], align 4
; CHECK-NEXT: br label [[BB2]]
; CHECK: bb2:
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]])
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4
@@ -617,20 +599,18 @@ define void @multi_bb_pdom_test0(i1 %b) {
; CHECK-LABEL: define void @multi_bb_pdom_test0
; CHECK-SAME: (i1 [[B:%.*]]) {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
-; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
; CHECK-NEXT: br i1 [[B]], label [[BB0:%.*]], label [[BB1:%.*]]
; CHECK: bb0:
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]])
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
; CHECK-NEXT: br label [[BB2:%.*]]
; CHECK: bb1:
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
; CHECK-NEXT: br label [[BB2]]
; CHECK: bb2:
-; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4
@@ -661,19 +641,18 @@ define void @multi_bb_pdom_test1(i1 %b) {
; CHECK-LABEL: define void @multi_bb_pdom_test1
; CHECK-SAME: (i1 [[B:%.*]]) {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
-; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
; CHECK-NEXT: br i1 [[B]], label [[BB0:%.*]], label [[BB1:%.*]]
; CHECK: bb0:
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]])
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
; CHECK-NEXT: br label [[BB2:%.*]]
; CHECK: bb1:
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
; CHECK-NEXT: br label [[BB2]]
; CHECK: bb2:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 42, [[BB0]] ], [ 41, [[BB1]] ]
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4
@@ -697,26 +676,23 @@ bb2:
}
-; TODO: merge allocas for multi basicblock loop case.
define void @multi_bb_loop(i32 %n) {
; CHECK-LABEL: define void @multi_bb_loop
; CHECK-SAME: (i32 [[N:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[NLT1:%.*]] = icmp slt i32 [[N]], 1
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 8
-; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 0, i32 1, i32 42 }, ptr [[SRC]], align 4
; CHECK-NEXT: br i1 [[NLT1]], label [[LOOP_EXIT:%.*]], label [[LOOP_BODY:%.*]]
; CHECK: loop_body:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[NEW_I:%.*]], [[LOOP_BODY]] ], [ 1, [[ENTRY:%.*]] ]
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DEST]], ptr align 8 [[SRC]], i64 12, i1 false)
; CHECK-NEXT: [[NEW_I]] = add i32 [[I]], 1
; CHECK-NEXT: store i32 [[NEW_I]], ptr [[SRC]], align 4
; CHECK-NEXT: [[IGTN:%.*]] = icmp sgt i32 [[NEW_I]], [[N]]
; CHECK-NEXT: br i1 [[IGTN]], label [[LOOP_EXIT]], label [[LOOP_BODY]]
; CHECK: loop_exit:
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: ret void
;
entry:
@@ -740,14 +716,11 @@ loop_exit:
ret void
}
-; TODO: merge allocas for multi basicblocks, s.t. some modref which is unreachable from copy exists.
define void @multi_bb_unreachable_modref(i1 %b0) {
; CHECK-LABEL: define void @multi_bb_unreachable_modref
; CHECK-SAME: (i1 [[B0:%.*]]) {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
-; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
; CHECK-NEXT: br i1 [[B0]], label [[BB0:%.*]], label [[EXIT:%.*]]
@@ -755,9 +728,6 @@ define void @multi_bb_unreachable_modref(i1 %b0) {
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
; CHECK-NEXT: ret void
; CHECK: bb0:
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4
@@ -779,26 +749,21 @@ bb0:
ret void
}
-; TODO: merge allocas for multi basicblocks, s.t. memcpy doesn't dominate the uses.
define void @multi_bb_non_dominated(i1 %b0, i1 %b1) {
; CHECK-LABEL: define void @multi_bb_non_dominated
; CHECK-SAME: (i1 [[B0:%.*]], i1 [[B1:%.*]]) {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
-; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
; CHECK-NEXT: br i1 [[B0]], label [[BB0:%.*]], label [[BB1:%.*]]
; CHECK: bb0:
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
; CHECK-NEXT: br label [[BB2:%.*]]
; CHECK: bb1:
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
; CHECK-NEXT: br label [[BB2]]
; CHECK: bb2:
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4
@@ -815,11 +780,9 @@ bb0:
bb1:
%2 = call i32 @use_nocapture(ptr noundef nocapture %src)
- ; %3 = call i32 @use_writeonly(ptr noundef nocapture %dest)
br label %bb2
bb2:
- ; %4 = call i32 @use_nocapture(ptr noundef nocapture %src)
call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %src)
call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %dest)
ret void
More information about the llvm-commits
mailing list