[llvm] b02d349 - Revert "Revert "[MemCpyOpt] implement single BB stack-move optimization which unify the static unescaped allocas""
via llvm-commits
llvm-commits at lists.llvm.org
Sat Jul 15 00:49:30 PDT 2023
Author: khei4
Date: 2023-07-15T16:27:38+09:00
New Revision: b02d349cbfaa81c9bbc928c4de46b12d976c1882
URL: https://github.com/llvm/llvm-project/commit/b02d349cbfaa81c9bbc928c4de46b12d976c1882
DIFF: https://github.com/llvm/llvm-project/commit/b02d349cbfaa81c9bbc928c4de46b12d976c1882.diff
LOG: Revert "Revert "[MemCpyOpt] implement single BB stack-move optimization which unify the static unescaped allocas""
This reverts commit 36a6eb7d12a9f827bf3d5d4e5fdc68b8a62807b2.
[MemCpyOpt] check that load/store and dest/src alloca are all in the same bb
Differential Revision: https://reviews.llvm.org/D153453
Co-authored-by: serge-sans-paille <sguelton at mozilla.com>
Added:
Modified:
llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
llvm/test/Transforms/MemCpyOpt/callslot.ll
llvm/test/Transforms/MemCpyOpt/stack-move.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
index 9ce64623e25b27..d3e5e2591eea11 100644
--- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
+++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
@@ -20,6 +20,7 @@
namespace llvm {
class AAResults;
+class AllocaInst;
class BatchAAResults;
class AssumptionCache;
class CallBase;
@@ -77,6 +78,9 @@ class MemCpyOptPass : public PassInfoMixin<MemCpyOptPass> {
Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr,
Value *ByteVal);
bool moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI);
+ bool performStackMoveOptzn(Instruction *Load, Instruction *Store,
+ AllocaInst *DestAlloca, AllocaInst *SrcAlloca,
+ uint64_t Size, BatchAAResults &BAA);
void eraseInstruction(Instruction *I);
bool iterateOnFunction(Function &F);
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 00937e0d734ab3..1dac4a285f20da 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -69,6 +69,7 @@ STATISTIC(NumMemSetInfer, "Number of memsets inferred");
STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
STATISTIC(NumCallSlot, "Number of call slot optimizations performed");
+STATISTIC(NumStackMove, "Number of stack-move optimizations performed");
namespace {
@@ -730,6 +731,23 @@ bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
return true;
}
+ // If this is a load-store pair from a stack slot to a stack slot, we
+ // might be able to perform the stack-move optimization just as we do for
+ // memcpys from an alloca to an alloca.
+ if (auto *DestAlloca = dyn_cast<AllocaInst>(SI->getPointerOperand())) {
+ if (auto *SrcAlloca = dyn_cast<AllocaInst>(LI->getPointerOperand())) {
+ if (performStackMoveOptzn(LI, SI, DestAlloca, SrcAlloca,
+ DL.getTypeStoreSize(T), BAA)) {
+ // Avoid invalidating the iterator.
+ BBI = SI->getNextNonDebugInstruction()->getIterator();
+ eraseInstruction(SI);
+ eraseInstruction(LI);
+ ++NumMemCpyInstr;
+ return true;
+ }
+ }
+ }
+
return false;
}
@@ -1408,6 +1426,212 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
return true;
}
+// Attempts to optimize the pattern whereby memory is copied from an alloca to
+// another alloca, where the two allocas don't have conflicting mod/ref. If
+// successful, the two allocas can be merged into one and the transfer can be
+// deleted. This pattern is generated frequently in Rust, due to the ubiquity of
+// move operations in that language.
+//
+// Once we determine that the optimization is safe to perform, we replace all
+// uses of the destination alloca with the source alloca. We also "shrink wrap"
+// the lifetime markers of the single merged alloca to before the first use
+// and after the last use. Note that the "shrink wrapping" procedure is a safe
+// transformation only because we restrict the scope of this optimization to
+// allocas that aren't captured.
+bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
+ AllocaInst *DestAlloca,
+ AllocaInst *SrcAlloca, uint64_t Size,
+ BatchAAResults &BAA) {
+ LLVM_DEBUG(dbgs() << "Stack Move: Attempting to optimize:\n"
+ << *Store << "\n");
+
+ // Make sure the two allocas are in the same address space.
+ if (SrcAlloca->getAddressSpace() != DestAlloca->getAddressSpace()) {
+ LLVM_DEBUG(dbgs() << "Stack Move: Address space mismatch\n");
+ return false;
+ }
+
+ // 1. Check that copy is full. Calculate the static size of the allocas to be
+ // merged, bail out if we can't.
+ const DataLayout &DL = DestAlloca->getModule()->getDataLayout();
+ std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize(DL);
+ if (!SrcSize || SrcSize->isScalable() || Size != SrcSize->getFixedValue()) {
+ LLVM_DEBUG(dbgs() << "Stack Move: Source alloca size mismatch\n");
+ return false;
+ }
+ std::optional<TypeSize> DestSize = DestAlloca->getAllocationSize(DL);
+ if (!DestSize || DestSize->isScalable() ||
+ Size != DestSize->getFixedValue()) {
+ LLVM_DEBUG(dbgs() << "Stack Move: Destination alloca size mismatch\n");
+ return false;
+ }
+
+ // 2-1. Check that src and dest are static allocas, which are not affected by
+ // stacksave/stackrestore.
+ if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca() ||
+ SrcAlloca->getParent() != Load->getParent() ||
+ SrcAlloca->getParent() != Store->getParent())
+ return false;
+
+ // 2-2. Check that src and dest are never captured, unescaped allocas. Also
+ // collect lifetime markers first/last users in order to shrink wrap the
+ // lifetimes, and instructions with noalias metadata to remove them.
+
+ SmallVector<Instruction *, 4> LifetimeMarkers;
+ Instruction *FirstUser = nullptr, *LastUser = nullptr;
+ SmallSet<Instruction *, 4> NoAliasInstrs;
+
+ // Recursively track the user and check whether modified alias exist.
+ auto IsDereferenceableOrNull = [](Value *V, const DataLayout &DL) -> bool {
+ bool CanBeNull, CanBeFreed;
+ return V->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed);
+ };
+
+ auto CaptureTrackingWithModRef =
+ [&](Instruction *AI,
+ function_ref<bool(Instruction *)> ModRefCallback) -> bool {
+ SmallVector<Instruction *, 8> Worklist;
+ Worklist.push_back(AI);
+ unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking();
+ Worklist.reserve(MaxUsesToExplore);
+ SmallSet<const Use *, 20> Visited;
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.back();
+ Worklist.pop_back();
+ for (const Use &U : I->uses()) {
+ if (Visited.size() >= MaxUsesToExplore) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Stack Move: Exceeded max uses to see ModRef, bailing\n");
+ return false;
+ }
+ if (!Visited.insert(&U).second)
+ continue;
+ switch (DetermineUseCaptureKind(U, IsDereferenceableOrNull)) {
+ case UseCaptureKind::MAY_CAPTURE:
+ return false;
+ case UseCaptureKind::PASSTHROUGH:
+ // Instructions cannot have non-instruction users.
+ Worklist.push_back(cast<Instruction>(U.getUser()));
+ continue;
+ case UseCaptureKind::NO_CAPTURE: {
+ auto *UI = cast<Instruction>(U.getUser());
+ if (DestAlloca->getParent() != UI->getParent())
+ return false;
+ if (!FirstUser || UI->comesBefore(FirstUser))
+ FirstUser = UI;
+ if (!LastUser || LastUser->comesBefore(UI))
+ LastUser = UI;
+ if (UI->hasMetadata(LLVMContext::MD_noalias))
+ NoAliasInstrs.insert(UI);
+ if (UI->isLifetimeStartOrEnd()) {
+ // We note the locations of these intrinsic calls so that we can
+ // delete them later if the optimization succeeds, this is safe
+ // since both llvm.lifetime.start and llvm.lifetime.end intrinsics
+ // conceptually fill all the bytes of the alloca with an undefined
+ // value.
+ int64_t Size = cast<ConstantInt>(UI->getOperand(0))->getSExtValue();
+ if (Size < 0 || Size == DestSize) {
+ LifetimeMarkers.push_back(UI);
+ continue;
+ }
+ }
+ if (!ModRefCallback(UI))
+ return false;
+ }
+ }
+ }
+ }
+ return true;
+ };
+
+ // 3. Check that dest has no Mod/Ref, except full size lifetime intrinsics,
+ // from the alloca to the Store.
+ ModRefInfo DestModRef = ModRefInfo::NoModRef;
+ MemoryLocation DestLoc(DestAlloca, LocationSize::precise(Size));
+ auto DestModRefCallback = [&](Instruction *UI) -> bool {
+ // We don't care about the store itself.
+ if (UI == Store)
+ return true;
+ ModRefInfo Res = BAA.getModRefInfo(UI, DestLoc);
+ // FIXME: For multi-BB cases, we need to see reachability from it to
+ // store.
+ // Bailout if Dest may have any ModRef before Store.
+ if (UI->comesBefore(Store) && isModOrRefSet(Res))
+ return false;
+ DestModRef |= BAA.getModRefInfo(UI, DestLoc);
+
+ return true;
+ };
+
+ if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback))
+ return false;
+
+ // 3. Check that, from after the Load to the end of the BB,
+ // 3-1. if the dest has any Mod, src has no Ref, and
+ // 3-2. if the dest has any Ref, src has no Mod except full-sized lifetimes.
+ MemoryLocation SrcLoc(SrcAlloca, LocationSize::precise(Size));
+
+ auto SrcModRefCallback = [&](Instruction *UI) -> bool {
+ // Any ModRef before Load doesn't matter, also Load and Store can be
+ // ignored.
+ if (UI->comesBefore(Load) || UI == Load || UI == Store)
+ return true;
+ ModRefInfo Res = BAA.getModRefInfo(UI, SrcLoc);
+ if ((isModSet(DestModRef) && isRefSet(Res)) ||
+ (isRefSet(DestModRef) && isModSet(Res)))
+ return false;
+
+ return true;
+ };
+
+ if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback))
+ return false;
+
+ // We can do the transformation. First, align the allocas appropriately.
+ SrcAlloca->setAlignment(
+ std::max(SrcAlloca->getAlign(), DestAlloca->getAlign()));
+
+ // Merge the two allocas.
+ DestAlloca->replaceAllUsesWith(SrcAlloca);
+ eraseInstruction(DestAlloca);
+
+ // Drop metadata on the source alloca.
+ SrcAlloca->dropUnknownNonDebugMetadata();
+
+ // Do "shrink wrap" the lifetimes, if the original lifetime intrinsics exists.
+ if (!LifetimeMarkers.empty()) {
+ LLVMContext &C = SrcAlloca->getContext();
+ IRBuilder<> Builder(C);
+
+ ConstantInt *AllocaSize = ConstantInt::get(Type::getInt64Ty(C), Size);
+ // Create a new lifetime start marker before the first user of src or alloca
+ // users.
+ Builder.SetInsertPoint(FirstUser->getParent(), FirstUser->getIterator());
+ Builder.CreateLifetimeStart(SrcAlloca, AllocaSize);
+
+ // Create a new lifetime end marker after the last user of src or alloca
+ // users.
+ Builder.SetInsertPoint(LastUser->getParent(), ++LastUser->getIterator());
+ Builder.CreateLifetimeEnd(SrcAlloca, AllocaSize);
+
+ // Remove all other lifetime markers.
+ for (Instruction *I : LifetimeMarkers)
+ eraseInstruction(I);
+ }
+
+ // As this transformation can cause memory accesses that didn't previously
+ // alias to begin to alias one another, we remove !noalias metadata from any
+ // uses of either alloca. This is conservative, but more precision doesn't
+ // seem worthwhile right now.
+ for (Instruction *I : NoAliasInstrs)
+ I->setMetadata(LLVMContext::MD_noalias, nullptr);
+
+ LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n");
+ NumStackMove++;
+ return true;
+}
+
/// Perform simplification of memcpy's. If we have memcpy A
/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
/// B to be a memcpy from X to Z (or potentially a memmove, depending on
@@ -1464,13 +1688,14 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
AnyClobber, MemoryLocation::getForSource(M), BAA);
- // There are four possible optimizations we can do for memcpy:
+ // There are five possible optimizations we can do for memcpy:
// a) memcpy-memcpy xform which exposes redundance for DSE.
// b) call-memcpy xform for return slot optimization.
// c) memcpy from freshly alloca'd space or space that has just started
// its lifetime copies undefined data, and we can therefore eliminate
// the memcpy in favor of the data that was already at the destination.
// d) memcpy from a just-memset'd source can be turned into memset.
+ // e) elimination of memcpy via stack-move optimization.
if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
if (Instruction *MI = MD->getMemoryInst()) {
if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
@@ -1489,7 +1714,8 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
}
}
if (auto *MDep = dyn_cast<MemCpyInst>(MI))
- return processMemCpyMemCpyDependence(M, MDep, BAA);
+ if (processMemCpyMemCpyDependence(M, MDep, BAA))
+ return true;
if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
if (performMemCpyToMemSetOptzn(M, MDep, BAA)) {
LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
@@ -1508,6 +1734,27 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
}
}
+ // If the transfer is from a stack slot to a stack slot, then we may be able
+ // to perform the stack-move optimization. See the comments in
+ // performStackMoveOptzn() for more details.
+ auto *DestAlloca = dyn_cast<AllocaInst>(M->getDest());
+ if (!DestAlloca)
+ return false;
+ auto *SrcAlloca = dyn_cast<AllocaInst>(M->getSource());
+ if (!SrcAlloca)
+ return false;
+ ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength());
+ if (Len == nullptr)
+ return false;
+ if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca, Len->getZExtValue(),
+ BAA)) {
+ // Avoid invalidating the iterator.
+ BBI = M->getNextNonDebugInstruction()->getIterator();
+ eraseInstruction(M);
+ ++NumMemCpyInstr;
+ return true;
+ }
+
return false;
}
diff --git a/llvm/test/Transforms/MemCpyOpt/callslot.ll b/llvm/test/Transforms/MemCpyOpt/callslot.ll
index 3a6b3c3804b574..8c769319236d65 100644
--- a/llvm/test/Transforms/MemCpyOpt/callslot.ll
+++ b/llvm/test/Transforms/MemCpyOpt/callslot.ll
@@ -56,11 +56,9 @@ define void @write_dest_between_call_and_memcpy() {
define void @write_src_between_call_and_memcpy() {
; CHECK-LABEL: @write_src_between_call_and_memcpy(
-; CHECK-NEXT: [[DEST:%.*]] = alloca [16 x i8], align 1
; CHECK-NEXT: [[SRC:%.*]] = alloca [16 x i8], align 1
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[SRC]], i8 0, i64 16, i1 false)
; CHECK-NEXT: store i8 1, ptr [[SRC]], align 1
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DEST]], ptr [[SRC]], i64 16, i1 false)
; CHECK-NEXT: ret void
;
%dest = alloca [16 x i8]
diff --git a/llvm/test/Transforms/MemCpyOpt/stack-move.ll b/llvm/test/Transforms/MemCpyOpt/stack-move.ll
index 9ceb6edbfd997e..6a06c988bfa252 100644
--- a/llvm/test/Transforms/MemCpyOpt/stack-move.ll
+++ b/llvm/test/Transforms/MemCpyOpt/stack-move.ll
@@ -20,19 +20,14 @@ declare i32 @use_maycapture(ptr noundef)
declare i32 @use_readonly(ptr readonly)
declare i32 @use_writeonly(ptr noundef) memory(write)
-; TODO: Merge alloca and remove memcpy.
define void @basic_memcpy() {
; CHECK-LABEL: define void @basic_memcpy() {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
-; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4
@@ -51,19 +46,14 @@ define void @basic_memcpy() {
ret void
}
-; TODO: Merge alloca and remove memmove.
define void @basic_memmove() {
; CHECK-LABEL: define void @basic_memmove() {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
-; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4
@@ -82,21 +72,15 @@ define void @basic_memmove() {
ret void
}
-; TODO: Merge alloca and remove load/store.
; Tests that the optimization succeeds with a load/store pair.
define void @load_store() {
; CHECK-LABEL: define void @load_store() {
; CHECK-NEXT: [[SRC:%.*]] = alloca i32, align 4
-; CHECK-NEXT: [[DEST:%.*]] = alloca i32, align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[DEST]])
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[SRC]])
; CHECK-NEXT: store i32 42, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
-; CHECK-NEXT: [[SRC_VAL:%.*]] = load i32, ptr [[SRC]], align 4
-; CHECK-NEXT: store i32 [[SRC_VAL]], ptr [[DEST]], align 4
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[DEST]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[SRC]])
; CHECK-NEXT: ret void
;
%src = alloca i32, align 4
@@ -115,20 +99,15 @@ define void @load_store() {
ret void
}
-; TODO: Merge alloca.
; Tests that merging two allocas shouldn't be more poisonous, smaller aligned src is valid.
define void @align_up() {
; CHECK-LABEL: define void @align_up() {
-; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
-; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4
@@ -146,26 +125,17 @@ define void @align_up() {
ret void
}
-; TODO: Merge alloca and remove memcpy, shrinkwrap lifetimes.
; Tests that we correctly remove extra lifetime intrinsics when performing the
; optimization.
define void @remove_extra_lifetime_intrinsics() {
; CHECK-LABEL: define void @remove_extra_lifetime_intrinsics() {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
-; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
-; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4
@@ -188,17 +158,14 @@ define void @remove_extra_lifetime_intrinsics() {
ret void
}
-; TODO: Merge alloca and remove memcpy, without inserting lifetime markers.
; Tests that we won't insert lifetime markers if they don't exist originally.
define void @no_lifetime() {
; CHECK-LABEL: define void @no_lifetime() {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
-; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]])
-; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4
@@ -213,23 +180,17 @@ define void @no_lifetime() {
ret void
}
-
-; TODO: Merge alloca and remove memcpy.
; Tests that aliasing src or dest but no modification desn't prevent transformations.
define void @alias_no_mod() {
; CHECK-LABEL: define void @alias_no_mod() {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
-; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
-; CHECK-NEXT: [[DEST_ALIAS:%.*]] = getelementptr [[STRUCT_FOO]], ptr [[DEST]], i32 0, i32 0
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
+; CHECK-NEXT: [[DEST_ALIAS:%.*]] = getelementptr [[STRUCT_FOO]], ptr [[SRC]], i32 0, i32 0
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
; CHECK-NEXT: [[SRC_ALIAS:%.*]] = getelementptr [[STRUCT_FOO]], ptr [[SRC]], i32 0, i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4
@@ -258,20 +219,15 @@ define void @alias_no_mod() {
!3 = !{!"Whatever"}
-; TODO: Merge alloca and remove memcpy, remove noalias metadata on src.
; Tests that we remove scoped noalias metadata from a call.
define void @remove_scoped_noalias() {
; CHECK-LABEL: define void @remove_scoped_noalias() {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
-; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]), !alias.scope !0
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]]), !noalias !0
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4
@@ -289,20 +245,15 @@ define void @remove_scoped_noalias() {
ret void
}
-; TODO: Merge alloca and remove memcpy, remove noalias metadata on src.
; Tests that we remove metadata on the merged alloca.
define void @remove_alloca_metadata() {
; CHECK-LABEL: define void @remove_alloca_metadata() {
-; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4, !annotation !3
-; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]), !alias.scope !0
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]]), !noalias !0
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4, !annotation !3
@@ -320,20 +271,15 @@ define void @remove_alloca_metadata() {
ret void
}
-; TODO: Merge alloca and remove memcpy.
; Tests that we can merge alloca if the dest and src has only refs except lifetime intrinsics.
define void @src_ref_dest_ref_after_copy() {
; CHECK-LABEL: define void @src_ref_dest_ref_after_copy() {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
-; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_readonly(ptr nocapture [[SRC]])
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_readonly(ptr nocapture [[DEST]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_readonly(ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4
@@ -355,15 +301,11 @@ define void @src_ref_dest_ref_after_copy() {
define void @src_mod_dest_mod_after_copy() {
; CHECK-LABEL: define void @src_mod_dest_mod_after_copy() {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
-; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_writeonly(ptr nocapture [[SRC]])
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_writeonly(ptr nocapture [[DEST]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_writeonly(ptr nocapture [[SRC]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4
@@ -797,7 +739,6 @@ define void @src_captured() {
ret void
}
-; TODO: Prevent this transformation
; Tests that failure if any modref exists before the copy,
; Exactly ref seems safe because no mod say ref would be always undefined, but to make simple and conservative.
define void @mod_ref_before_copy() {
@@ -831,7 +772,6 @@ define void @mod_ref_before_copy() {
ret void
}
-; TODO: Prevent this transformation
; Tests that failure because copy semantics will change if dest is replaced with src.
define void @mod_dest_before_copy() {
; CHECK-LABEL: define void @mod_dest_before_copy() {
@@ -864,7 +804,6 @@ define void @mod_dest_before_copy() {
ret void
}
-; TODO: Prevent transformations
define void @mod_src_before_store_after_load() {
; CHECK-LABEL: define void @mod_src_before_store_after_load() {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
More information about the llvm-commits
mailing list