[llvm] e0f9cc7 - Revert "Reapply "Revert "[MemCpyOpt] implement multi BB stack-move optimization"""
Vitaly Buka via llvm-commits
llvm-commits at lists.llvm.org
Sat Aug 26 19:26:41 PDT 2023
Author: Vitaly Buka
Date: 2023-08-26T19:24:50-07:00
New Revision: e0f9cc71cb6f4eb2e1566177e05425c497759dc6
URL: https://github.com/llvm/llvm-project/commit/e0f9cc71cb6f4eb2e1566177e05425c497759dc6
DIFF: https://github.com/llvm/llvm-project/commit/e0f9cc71cb6f4eb2e1566177e05425c497759dc6.diff
LOG: Revert "Reapply "Revert "[MemCpyOpt] implement multi BB stack-move optimization"""
Breaks multiple bots. e.g. https://lab.llvm.org/buildbot/#/builders/19/builds/18856
This reverts commit ac0072602c9d01fc031a2d0acb418f7191480ef0.
Added:
Modified:
llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
llvm/test/Other/new-pm-defaults.ll
llvm/test/Other/new-pm-lto-defaults.ll
llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
llvm/test/Other/new-pm-thinlto-prelink-defaults.ll
llvm/test/Transforms/MemCpyOpt/stack-move.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
index 3e8a5bf6a5bd56..d3e5e2591eea11 100644
--- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
+++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
@@ -34,7 +34,6 @@ class MemMoveInst;
class MemorySSA;
class MemorySSAUpdater;
class MemSetInst;
-class PostDominatorTree;
class StoreInst;
class TargetLibraryInfo;
class Value;
@@ -44,7 +43,6 @@ class MemCpyOptPass : public PassInfoMixin<MemCpyOptPass> {
AAResults *AA = nullptr;
AssumptionCache *AC = nullptr;
DominatorTree *DT = nullptr;
- PostDominatorTree *PDT = nullptr;
MemorySSA *MSSA = nullptr;
MemorySSAUpdater *MSSAU = nullptr;
@@ -55,8 +53,7 @@ class MemCpyOptPass : public PassInfoMixin<MemCpyOptPass> {
// Glue for the old PM.
bool runImpl(Function &F, TargetLibraryInfo *TLI, AAResults *AA,
- AssumptionCache *AC, DominatorTree *DT, PostDominatorTree *PDT,
- MemorySSA *MSSA);
+ AssumptionCache *AC, DominatorTree *DT, MemorySSA *MSSA);
private:
// Helper functions
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 2f89b194d4acac..6015bdf88a62ea 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -19,14 +19,12 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
-#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
@@ -1417,66 +1415,6 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
return true;
}
-using InsertionPt = PointerUnion<Instruction *, BasicBlock *>;
-/// Find the nearest Instruction or BasicBlock that dominates both I1 and
-/// I2.
-static InsertionPt findNearestCommonDominator(InsertionPt I1, InsertionPt I2,
- DominatorTree *DT) {
- auto GetParent = [](InsertionPt I) {
- if (auto *BB = dyn_cast<BasicBlock *>(I))
- return BB;
- return cast<Instruction *>(I)->getParent();
- };
- BasicBlock *BB1 = GetParent(I1);
- BasicBlock *BB2 = GetParent(I2);
- if (BB1 == BB2) {
- // BasicBlock InsertionPt means the terminator.
- if (isa<BasicBlock *>(I1))
- return I2;
- if (isa<BasicBlock *>(I2))
- return I1;
- return cast<Instruction *>(I1)->comesBefore(cast<Instruction *>(I2)) ? I1
- : I2;
- }
- BasicBlock *DomBB = DT->findNearestCommonDominator(BB1, BB2);
- if (BB2 == DomBB)
- return I2;
- if (BB1 == DomBB)
- return I1;
- return DomBB;
-}
-
-/// Find the nearest Instruction or BasicBlock that post-dominates both I1 and
-/// I2.
-static InsertionPt findNearestCommonPostDominator(InsertionPt I1,
- InsertionPt I2,
- PostDominatorTree *PDT) {
- auto GetParent = [](InsertionPt I) {
- if (auto *BB = dyn_cast<BasicBlock *>(I))
- return BB;
- return cast<Instruction *>(I)->getParent();
- };
- BasicBlock *BB1 = GetParent(I1);
- BasicBlock *BB2 = GetParent(I2);
- if (BB1 == BB2) {
- // BasicBlock InsertionPt means the first non-phi instruction.
- if (isa<BasicBlock *>(I1))
- return I2;
- if (isa<BasicBlock *>(I2))
- return I1;
- return cast<Instruction *>(I1)->comesBefore(cast<Instruction *>(I2)) ? I2
- : I1;
- }
- BasicBlock *PDomBB = PDT->findNearestCommonDominator(BB1, BB2);
- if (!PDomBB)
- return nullptr;
- if (BB2 == PDomBB)
- return I2;
- if (BB1 == PDomBB)
- return I1;
- return PDomBB;
-}
-
// Attempts to optimize the pattern whereby memory is copied from an alloca to
// another alloca, where the two allocas don't have conflicting mod/ref. If
// successful, the two allocas can be merged into one and the transfer can be
@@ -1502,7 +1440,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
return false;
}
- // Check that copy is full with static size.
+ // 1. Check that copy is full. Calculate the static size of the allocas to be
+ // merged, bail out if we can't.
const DataLayout &DL = DestAlloca->getModule()->getDataLayout();
std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize(DL);
if (!SrcSize || SrcSize->isScalable() || Size != SrcSize->getFixedValue()) {
@@ -1516,16 +1455,19 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
return false;
}
- if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca())
+ // 2-1. Check that src and dest are static allocas, which are not affected by
+ // stacksave/stackrestore.
+ if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca() ||
+ SrcAlloca->getParent() != Load->getParent() ||
+ SrcAlloca->getParent() != Store->getParent())
return false;
- // Check that src and dest are never captured, unescaped allocas. Also
- // find the nearest common dominator and postdominator for all users in
- // order to shrink wrap the lifetimes, and instructions with noalias metadata
- // to remove them.
+ // 2-2. Check that src and dest are never captured, unescaped allocas. Also
+ // collect lifetime markers first/last users in order to shrink wrap the
+ // lifetimes, and instructions with noalias metadata to remove them.
SmallVector<Instruction *, 4> LifetimeMarkers;
- InsertionPt Dom = nullptr, PDom = nullptr;
+ Instruction *FirstUser = nullptr, *LastUser = nullptr;
SmallSet<Instruction *, 4> NoAliasInstrs;
// Recursively track the user and check whether modified alias exist.
@@ -1563,13 +1505,12 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
continue;
case UseCaptureKind::NO_CAPTURE: {
auto *UI = cast<Instruction>(U.getUser());
- if (!Dom) {
- PDom = Dom = UI;
- } else {
- Dom = findNearestCommonDominator(Dom, UI, DT);
- if (PDom)
- PDom = findNearestCommonPostDominator(PDom, UI, PDT);
- }
+ if (DestAlloca->getParent() != UI->getParent())
+ return false;
+ if (!FirstUser || UI->comesBefore(FirstUser))
+ FirstUser = UI;
+ if (!LastUser || LastUser->comesBefore(UI))
+ LastUser = UI;
if (UI->isLifetimeStartOrEnd()) {
// We note the locations of these intrinsic calls so that we can
// delete them later if the optimization succeeds, this is safe
@@ -1593,64 +1534,37 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
return true;
};
- // Check that dest has no Mod/Ref, from the alloca to the Store, except full
- // size lifetime intrinsics. And collect modref inst for the reachability
- // check.
+ // 3. Check that dest has no Mod/Ref, except full size lifetime intrinsics,
+ // from the alloca to the Store.
ModRefInfo DestModRef = ModRefInfo::NoModRef;
MemoryLocation DestLoc(DestAlloca, LocationSize::precise(Size));
- SmallVector<BasicBlock *, 8> ReachabilityWorklist;
auto DestModRefCallback = [&](Instruction *UI) -> bool {
// We don't care about the store itself.
if (UI == Store)
return true;
ModRefInfo Res = BAA.getModRefInfo(UI, DestLoc);
- DestModRef |= Res;
- if (isModOrRefSet(Res)) {
- // Instructions reachability checks.
- // FIXME: adding the Instruction version isPotentiallyReachableFromMany on
- // lib/Analysis/CFG.cpp (currently only for BasicBlocks) might be helpful.
- if (UI->getParent() == Store->getParent()) {
- // The same block case is special because it's the only time we're
- // looking within a single block to see which instruction comes first.
- // Once we start looking at multiple blocks, the first instruction of
- // the block is reachable, so we only need to determine reachability
- // between whole blocks.
- BasicBlock *BB = UI->getParent();
-
- // If A comes before B, then B is definitively reachable from A.
- if (UI->comesBefore(Store))
- return false;
-
- // If the user's parent block is entry, no predecessor exists.
- if (BB->isEntryBlock())
- return true;
+ // FIXME: For multi-BB cases, we need to see reachability from it to
+ // store.
+ // Bailout if Dest may have any ModRef before Store.
+ if (UI->comesBefore(Store) && isModOrRefSet(Res))
+ return false;
+ DestModRef |= BAA.getModRefInfo(UI, DestLoc);
- // Otherwise, continue doing the normal per-BB CFG walk.
- ReachabilityWorklist.append(succ_begin(BB), succ_end(BB));
- } else {
- ReachabilityWorklist.push_back(UI->getParent());
- }
- }
return true;
};
if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback))
return false;
- // Bailout if Dest may have any ModRef before Store.
- if (!ReachabilityWorklist.empty() &&
- isPotentiallyReachableFromMany(ReachabilityWorklist, Store->getParent(),
- nullptr, DT, nullptr))
- return false;
- // Check that, from after the Load to the end of the BB,
- // - if the dest has any Mod, src has no Ref, and
- // - if the dest has any Ref, src has no Mod except full-sized lifetimes.
+ // 3. Check that, from after the Load to the end of the BB,
+ // 3-1. if the dest has any Mod, src has no Ref, and
+ // 3-2. if the dest has any Ref, src has no Mod except full-sized lifetimes.
MemoryLocation SrcLoc(SrcAlloca, LocationSize::precise(Size));
auto SrcModRefCallback = [&](Instruction *UI) -> bool {
- // Any ModRef post-dominated by Load doesn't matter, also Load and Store
- // themselves can be ignored.
- if (PDT->dominates(Load, UI) || UI == Load || UI == Store)
+ // Any ModRef before Load doesn't matter, also Load and Store can be
+ // ignored.
+ if (UI->comesBefore(Load) || UI == Load || UI == Store)
return true;
ModRefInfo Res = BAA.getModRefInfo(UI, SrcLoc);
if ((isModSet(DestModRef) && isRefSet(Res)) ||
@@ -1682,48 +1596,22 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
ConstantInt *AllocaSize = ConstantInt::get(Type::getInt64Ty(C), Size);
// Create a new lifetime start marker before the first user of src or alloca
// users.
- MemoryAccess *StartMA;
- if (auto *DomI = dyn_cast_if_present<Instruction *>(Dom)) {
- Builder.SetInsertPoint(DomI->getParent(), DomI->getIterator());
- auto *Start = Builder.CreateLifetimeStart(SrcAlloca, AllocaSize);
- StartMA = MSSAU->createMemoryAccessBefore(Start, nullptr,
- MSSA->getMemoryAccess(DomI));
- } else {
- auto *DomB = cast<BasicBlock *>(Dom);
- Builder.SetInsertPoint(DomB->getTerminator());
- auto *Start = Builder.CreateLifetimeStart(SrcAlloca, AllocaSize);
- StartMA = MSSAU->createMemoryAccessInBB(
- Start, nullptr, Start->getParent(), MemorySSA::BeforeTerminator);
- }
+ Builder.SetInsertPoint(FirstUser->getParent(), FirstUser->getIterator());
+ auto *Start = Builder.CreateLifetimeStart(SrcAlloca, AllocaSize);
+ auto *FirstMA = MSSA->getMemoryAccess(FirstUser);
+ auto *StartMA = MSSAU->createMemoryAccessBefore(Start, nullptr, FirstMA);
MSSAU->insertDef(cast<MemoryDef>(StartMA), /*RenameUses=*/true);
// Create a new lifetime end marker after the last user of src or alloca
- // users. If there's no such postdominator, just don't bother; we could
- // create one at each exit block, but that'd be essentially semantically
- // meaningless.
- // If the PDom is the terminator (e.g. invoke), see the next immediate post
- // dominator.
- if (auto *PDomI = dyn_cast_if_present<Instruction *>(PDom);
- PDomI && PDomI->isTerminator()) {
- auto *IPDomNode = (*PDT)[PDomI->getParent()]->getIDom();
- PDom = IPDomNode ? IPDomNode->getBlock() : nullptr;
- }
- if (PDom) {
- MemoryAccess *EndMA;
- if (auto *PDomI = dyn_cast<Instruction *>(PDom)) {
- // If PDom is Instruction ptr, insert after it, because it's a user of
- // SrcAlloca.
- Builder.SetInsertPoint(PDomI->getParent(), ++PDomI->getIterator());
- auto *End = Builder.CreateLifetimeEnd(SrcAlloca, AllocaSize);
- EndMA = MSSAU->createMemoryAccessAfter(End, nullptr,
- MSSA->getMemoryAccess(PDomI));
- } else {
- auto *PDomB = cast<BasicBlock *>(PDom);
- Builder.SetInsertPoint(PDomB, PDomB->getFirstInsertionPt());
- auto *End = Builder.CreateLifetimeEnd(SrcAlloca, AllocaSize);
- EndMA = MSSAU->createMemoryAccessInBB(End, nullptr, End->getParent(),
- MemorySSA::Beginning);
- }
+ // users.
+ // FIXME: If the last user is the terminator for the bb, we can insert
+ // lifetime.end marker to the immidiate post-dominator, but currently do
+ // nothing.
+ if (!LastUser->isTerminator()) {
+ Builder.SetInsertPoint(LastUser->getParent(), ++LastUser->getIterator());
+ auto *End = Builder.CreateLifetimeEnd(SrcAlloca, AllocaSize);
+ auto *LastMA = MSSA->getMemoryAccess(LastUser);
+ auto *EndMA = MSSAU->createMemoryAccessAfter(End, nullptr, LastMA);
MSSAU->insertDef(cast<MemoryDef>(EndMA), /*RenameUses=*/true);
}
@@ -2111,10 +1999,9 @@ PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
auto *AA = &AM.getResult<AAManager>(F);
auto *AC = &AM.getResult<AssumptionAnalysis>(F);
auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
- auto *PDT = &AM.getResult<PostDominatorTreeAnalysis>(F);
auto *MSSA = &AM.getResult<MemorySSAAnalysis>(F);
- bool MadeChange = runImpl(F, &TLI, AA, AC, DT, PDT, &MSSA->getMSSA());
+ bool MadeChange = runImpl(F, &TLI, AA, AC, DT, &MSSA->getMSSA());
if (!MadeChange)
return PreservedAnalyses::all();
@@ -2126,14 +2013,12 @@ PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
bool MemCpyOptPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
AliasAnalysis *AA_, AssumptionCache *AC_,
- DominatorTree *DT_, PostDominatorTree *PDT_,
- MemorySSA *MSSA_) {
+ DominatorTree *DT_, MemorySSA *MSSA_) {
bool MadeChange = false;
TLI = TLI_;
AA = AA_;
AC = AC_;
DT = DT_;
- PDT = PDT_;
MSSA = MSSA_;
MemorySSAUpdater MSSAU_(MSSA_);
MSSAU = &MSSAU_;
diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll
index 016dfad98c69f7..5cb9a7f331a680 100644
--- a/llvm/test/Other/new-pm-defaults.ll
+++ b/llvm/test/Other/new-pm-defaults.ll
@@ -190,7 +190,6 @@
; CHECK-O23SZ-NEXT: Running pass: GVNPass
; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis
; CHECK-O1-NEXT: Running pass: MemCpyOptPass
-; CHECK-O1-NEXT: Running analysis: PostDominatorTreeAnalysis
; CHECK-O-NEXT: Running pass: SCCPPass
; CHECK-O-NEXT: Running pass: BDCEPass
; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis
@@ -202,7 +201,7 @@
; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis
; CHECK-O1-NEXT: Running pass: CoroElidePass
; CHECK-O-NEXT: Running pass: ADCEPass
-; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis
+; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
; CHECK-O23SZ-NEXT: Running pass: DSEPass
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo
diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll
index c444197e0db706..1b64760e42c1ef 100644
--- a/llvm/test/Other/new-pm-lto-defaults.ll
+++ b/llvm/test/Other/new-pm-lto-defaults.ll
@@ -103,8 +103,8 @@
; CHECK-O23SZ-NEXT: Running pass: GVNPass on foo
; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis on foo
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass on foo
-; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis on foo
; CHECK-O23SZ-NEXT: Running pass: DSEPass on foo
+; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis on foo
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo
; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass on foo
; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass on foo
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
index 79010c3eb80804..aa3b8e85749d95 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
@@ -125,7 +125,6 @@
; CHECK-O23SZ-NEXT: Running pass: GVNPass
; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis
; CHECK-O1-NEXT: Running pass: MemCpyOptPass
-; CHECK-O1-NEXT: Running analysis: PostDominatorTreeAnalysis
; CHECK-O-NEXT: Running pass: SCCPPass
; CHECK-O-NEXT: Running pass: BDCEPass
; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis
@@ -136,7 +135,7 @@
; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis
; CHECK-O1-NEXT: Running pass: CoroElidePass
; CHECK-O-NEXT: Running pass: ADCEPass
-; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis
+; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
; CHECK-O23SZ-NEXT: Running pass: DSEPass
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo
diff --git a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll
index 0697fb253b1fe0..7761ae84b3a125 100644
--- a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll
@@ -157,7 +157,6 @@
; CHECK-O23SZ-NEXT: Running pass: GVNPass
; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis
; CHECK-O1-NEXT: Running pass: MemCpyOptPass
-; CHECK-O1-NEXT: Running analysis: PostDominatorTreeAnalysis
; CHECK-O-NEXT: Running pass: SCCPPass
; CHECK-O-NEXT: Running pass: BDCEPass
; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis
@@ -168,7 +167,7 @@
; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis
; CHECK-O1-NEXT: Running pass: CoroElidePass
; CHECK-O-NEXT: Running pass: ADCEPass
-; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis
+; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
; CHECK-O23SZ-NEXT: Running pass: DSEPass
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass
diff --git a/llvm/test/Transforms/MemCpyOpt/stack-move.ll b/llvm/test/Transforms/MemCpyOpt/stack-move.ll
index b74c66bf8196e0..ec5c14ee0a7bf8 100644
--- a/llvm/test/Transforms/MemCpyOpt/stack-move.ll
+++ b/llvm/test/Transforms/MemCpyOpt/stack-move.ll
@@ -406,19 +406,24 @@ suc:
ret void
}
+; TODO: merge allocas for bb-separated, but logically straight
define void @multi_bb_memcpy(i1 %b) {
; CHECK-LABEL: define void @multi_bb_memcpy
; CHECK-SAME: (i1 [[B:%.*]]) {
; CHECK-NEXT: [[SRC:%.*]] = alloca i32, align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[SRC]])
+; CHECK-NEXT: [[DEST:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[DEST]])
; CHECK-NEXT: store i32 42, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
; CHECK-NEXT: br label [[BB0:%.*]]
; CHECK: bb0:
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 4, i1 false)
; CHECK-NEXT: br label [[BB1:%.*]]
; CHECK: bb1:
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[SRC]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[DEST]])
; CHECK-NEXT: ret void
;
%src = alloca i32, align 4
@@ -440,17 +445,23 @@ bb1:
ret void
}
+; TODO: Merge alloca
define void @multi_bb_load_store(i1 %b) {
; CHECK-LABEL: define void @multi_bb_load_store
; CHECK-SAME: (i1 [[B:%.*]]) {
; CHECK-NEXT: [[SRC:%.*]] = alloca i32, align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[SRC]])
+; CHECK-NEXT: [[DEST:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[DEST]])
; CHECK-NEXT: store i32 42, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
+; CHECK-NEXT: [[SRC_VAL:%.*]] = load i32, ptr [[SRC]], align 4
+; CHECK-NEXT: store i32 [[SRC_VAL]], ptr [[DEST]], align 4
; CHECK-NEXT: br label [[BB0:%.*]]
; CHECK: bb0:
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[SRC]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[DEST]])
; CHECK-NEXT: ret void
;
%src = alloca i32, align 4
@@ -514,22 +525,28 @@ bb1:
ret void
}
+; TODO: merge allocas for multi basicblocks, s.t. all copy-dominated
+; uses are satisfy the condition.
define void @multi_bb_simple_br(i1 %b) {
; CHECK-LABEL: define void @multi_bb_simple_br
; CHECK-SAME: (i1 [[B:%.*]]) {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
+; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
; CHECK-NEXT: br i1 [[B]], label [[BB0:%.*]], label [[BB1:%.*]]
; CHECK: bb0:
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]])
; CHECK-NEXT: br label [[BB2:%.*]]
; CHECK: bb1:
-; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]])
; CHECK-NEXT: br label [[BB2]]
; CHECK: bb2:
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4
@@ -560,7 +577,7 @@ define void @multi_bb_dom_test0(i1 %b) {
; CHECK-LABEL: define void @multi_bb_dom_test0
; CHECK-SAME: (i1 [[B:%.*]]) {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
+; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
; CHECK-NEXT: br i1 [[B]], label [[BB0:%.*]], label [[BB1:%.*]]
; CHECK: bb0:
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
@@ -569,8 +586,9 @@ define void @multi_bb_dom_test0(i1 %b) {
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 40, i32 50, i32 60 }, ptr [[SRC]], align 4
; CHECK-NEXT: br label [[BB2]]
; CHECK: bb2:
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4
@@ -599,18 +617,20 @@ define void @multi_bb_pdom_test0(i1 %b) {
; CHECK-LABEL: define void @multi_bb_pdom_test0
; CHECK-SAME: (i1 [[B:%.*]]) {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
+; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
; CHECK-NEXT: br i1 [[B]], label [[BB0:%.*]], label [[BB1:%.*]]
; CHECK: bb0:
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]])
; CHECK-NEXT: br label [[BB2:%.*]]
; CHECK: bb1:
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]])
; CHECK-NEXT: br label [[BB2]]
; CHECK: bb2:
-; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]])
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4
@@ -641,18 +661,19 @@ define void @multi_bb_pdom_test1(i1 %b) {
; CHECK-LABEL: define void @multi_bb_pdom_test1
; CHECK-SAME: (i1 [[B:%.*]]) {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
+; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
; CHECK-NEXT: br i1 [[B]], label [[BB0:%.*]], label [[BB1:%.*]]
; CHECK: bb0:
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]])
; CHECK-NEXT: br label [[BB2:%.*]]
; CHECK: bb1:
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]])
; CHECK-NEXT: br label [[BB2]]
; CHECK: bb2:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 42, [[BB0]] ], [ 41, [[BB1]] ]
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4
@@ -676,23 +697,26 @@ bb2:
}
+; TODO: merge allocas for multi basicblock loop case.
define void @multi_bb_loop(i32 %n) {
; CHECK-LABEL: define void @multi_bb_loop
; CHECK-SAME: (i32 [[N:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[NLT1:%.*]] = icmp slt i32 [[N]], 1
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
+; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 0, i32 1, i32 42 }, ptr [[SRC]], align 4
; CHECK-NEXT: br i1 [[NLT1]], label [[LOOP_EXIT:%.*]], label [[LOOP_BODY:%.*]]
; CHECK: loop_body:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[NEW_I:%.*]], [[LOOP_BODY]] ], [ 1, [[ENTRY:%.*]] ]
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DEST]], ptr align 8 [[SRC]], i64 12, i1 false)
; CHECK-NEXT: [[NEW_I]] = add i32 [[I]], 1
; CHECK-NEXT: store i32 [[NEW_I]], ptr [[SRC]], align 4
; CHECK-NEXT: [[IGTN:%.*]] = icmp sgt i32 [[NEW_I]], [[N]]
; CHECK-NEXT: br i1 [[IGTN]], label [[LOOP_EXIT]], label [[LOOP_BODY]]
; CHECK: loop_exit:
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: ret void
;
entry:
@@ -716,11 +740,14 @@ loop_exit:
ret void
}
+; TODO: merge allocas for multi basicblocks, s.t. some modref which is unreachable from copy exists.
define void @multi_bb_unreachable_modref(i1 %b0) {
; CHECK-LABEL: define void @multi_bb_unreachable_modref
; CHECK-SAME: (i1 [[B0:%.*]]) {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
+; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
; CHECK-NEXT: br i1 [[B0]], label [[BB0:%.*]], label [[EXIT:%.*]]
@@ -728,6 +755,9 @@ define void @multi_bb_unreachable_modref(i1 %b0) {
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
; CHECK-NEXT: ret void
; CHECK: bb0:
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4
@@ -749,21 +779,26 @@ bb0:
ret void
}
+; TODO: merge allocas for multi basicblocks, s.t. memcpy doesn't dominate the uses.
define void @multi_bb_non_dominated(i1 %b0, i1 %b1) {
; CHECK-LABEL: define void @multi_bb_non_dominated
; CHECK-SAME: (i1 [[B0:%.*]], i1 [[B1:%.*]]) {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
+; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
; CHECK-NEXT: br i1 [[B0]], label [[BB0:%.*]], label [[BB1:%.*]]
; CHECK: bb0:
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
; CHECK-NEXT: br label [[BB2:%.*]]
; CHECK: bb1:
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
; CHECK-NEXT: br label [[BB2]]
; CHECK: bb2:
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4
@@ -780,9 +815,11 @@ bb0:
bb1:
%2 = call i32 @use_nocapture(ptr noundef nocapture %src)
+ ; %3 = call i32 @use_writeonly(ptr noundef nocapture %dest)
br label %bb2
bb2:
+ ; %4 = call i32 @use_nocapture(ptr noundef nocapture %src)
call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %src)
call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %dest)
ret void
More information about the llvm-commits
mailing list