[llvm] 8f3864b - Revert "Revert "Revert "[MemCpyOpt] implement single BB stack-move optimization which unify the static unescaped allocas"""
Kohei Asano via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 18 03:22:35 PDT 2023
Hi, Florian
> Please try to include a reason for the revert in the commit message in the future. It also helps with readability if you use `Recommit ….` instead of `Revert Revert`.
I really appreciate your pointing out to me, and it sounds pretty
reasonable. At least, I'll add explanations for the next
revert/reapply commits.
Thanks. Regards
Kohei
2023年7月18日(火) 18:57 Florian Hahn <florian_hahn at apple.com>:
>
> Hi,
>
> Please try to include a reason for the revert in the commit message in the future. It also helps with readability if you use `Recommit ….` instead of `Revert Revert`.
>
> Thanks,
> Florian
>
> > On Jul 18, 2023, at 11:42, via llvm-commits <llvm-commits at lists.llvm.org> wrote:
> >
> >
> > Author: khei4
> > Date: 2023-07-18T18:42:36+09:00
> > New Revision: 8f3864ba4323a253bcf29825d23cd325b52c4106
> >
> > URL: https://github.com/llvm/llvm-project/commit/8f3864ba4323a253bcf29825d23cd325b52c4106
> > DIFF: https://github.com/llvm/llvm-project/commit/8f3864ba4323a253bcf29825d23cd325b52c4106.diff
> >
> > LOG: Revert "Revert "Revert "[MemCpyOpt] implement single BB stack-move optimization which unify the static unescaped allocas"""
> >
> > This reverts commit b02d349cbfaa81c9bbc928c4de46b12d976c1882.
> >
> > Added:
> >
> >
> > Modified:
> > llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
> > llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
> > llvm/test/Transforms/MemCpyOpt/callslot.ll
> > llvm/test/Transforms/MemCpyOpt/stack-move.ll
> >
> > Removed:
> >
> >
> >
> > ################################################################################
> > diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
> > index d3e5e2591eea11..9ce64623e25b27 100644
> > --- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
> > +++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
> > @@ -20,7 +20,6 @@
> > namespace llvm {
> >
> > class AAResults;
> > -class AllocaInst;
> > class BatchAAResults;
> > class AssumptionCache;
> > class CallBase;
> > @@ -78,9 +77,6 @@ class MemCpyOptPass : public PassInfoMixin<MemCpyOptPass> {
> > Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr,
> > Value *ByteVal);
> > bool moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI);
> > - bool performStackMoveOptzn(Instruction *Load, Instruction *Store,
> > - AllocaInst *DestAlloca, AllocaInst *SrcAlloca,
> > - uint64_t Size, BatchAAResults &BAA);
> >
> > void eraseInstruction(Instruction *I);
> > bool iterateOnFunction(Function &F);
> >
> > diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
> > index 1dac4a285f20da..00937e0d734ab3 100644
> > --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
> > +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
> > @@ -69,7 +69,6 @@ STATISTIC(NumMemSetInfer, "Number of memsets inferred");
> > STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
> > STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
> > STATISTIC(NumCallSlot, "Number of call slot optimizations performed");
> > -STATISTIC(NumStackMove, "Number of stack-move optimizations performed");
> >
> > namespace {
> >
> > @@ -731,23 +730,6 @@ bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
> > return true;
> > }
> >
> > - // If this is a load-store pair from a stack slot to a stack slot, we
> > - // might be able to perform the stack-move optimization just as we do for
> > - // memcpys from an alloca to an alloca.
> > - if (auto *DestAlloca = dyn_cast<AllocaInst>(SI->getPointerOperand())) {
> > - if (auto *SrcAlloca = dyn_cast<AllocaInst>(LI->getPointerOperand())) {
> > - if (performStackMoveOptzn(LI, SI, DestAlloca, SrcAlloca,
> > - DL.getTypeStoreSize(T), BAA)) {
> > - // Avoid invalidating the iterator.
> > - BBI = SI->getNextNonDebugInstruction()->getIterator();
> > - eraseInstruction(SI);
> > - eraseInstruction(LI);
> > - ++NumMemCpyInstr;
> > - return true;
> > - }
> > - }
> > - }
> > -
> > return false;
> > }
> >
> > @@ -1426,212 +1408,6 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
> > return true;
> > }
> >
> > -// Attempts to optimize the pattern whereby memory is copied from an alloca to
> > -// another alloca, where the two allocas don't have conflicting mod/ref. If
> > -// successful, the two allocas can be merged into one and the transfer can be
> > -// deleted. This pattern is generated frequently in Rust, due to the ubiquity of
> > -// move operations in that language.
> > -//
> > -// Once we determine that the optimization is safe to perform, we replace all
> > -// uses of the destination alloca with the source alloca. We also "shrink wrap"
> > -// the lifetime markers of the single merged alloca to before the first use
> > -// and after the last use. Note that the "shrink wrapping" procedure is a safe
> > -// transformation only because we restrict the scope of this optimization to
> > -// allocas that aren't captured.
> > -bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
> > - AllocaInst *DestAlloca,
> > - AllocaInst *SrcAlloca, uint64_t Size,
> > - BatchAAResults &BAA) {
> > - LLVM_DEBUG(dbgs() << "Stack Move: Attempting to optimize:\n"
> > - << *Store << "\n");
> > -
> > - // Make sure the two allocas are in the same address space.
> > - if (SrcAlloca->getAddressSpace() != DestAlloca->getAddressSpace()) {
> > - LLVM_DEBUG(dbgs() << "Stack Move: Address space mismatch\n");
> > - return false;
> > - }
> > -
> > - // 1. Check that copy is full. Calculate the static size of the allocas to be
> > - // merged, bail out if we can't.
> > - const DataLayout &DL = DestAlloca->getModule()->getDataLayout();
> > - std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize(DL);
> > - if (!SrcSize || SrcSize->isScalable() || Size != SrcSize->getFixedValue()) {
> > - LLVM_DEBUG(dbgs() << "Stack Move: Source alloca size mismatch\n");
> > - return false;
> > - }
> > - std::optional<TypeSize> DestSize = DestAlloca->getAllocationSize(DL);
> > - if (!DestSize || DestSize->isScalable() ||
> > - Size != DestSize->getFixedValue()) {
> > - LLVM_DEBUG(dbgs() << "Stack Move: Destination alloca size mismatch\n");
> > - return false;
> > - }
> > -
> > - // 2-1. Check that src and dest are static allocas, which are not affected by
> > - // stacksave/stackrestore.
> > - if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca() ||
> > - SrcAlloca->getParent() != Load->getParent() ||
> > - SrcAlloca->getParent() != Store->getParent())
> > - return false;
> > -
> > - // 2-2. Check that src and dest are never captured, unescaped allocas. Also
> > - // collect lifetime markers first/last users in order to shrink wrap the
> > - // lifetimes, and instructions with noalias metadata to remove them.
> > -
> > - SmallVector<Instruction *, 4> LifetimeMarkers;
> > - Instruction *FirstUser = nullptr, *LastUser = nullptr;
> > - SmallSet<Instruction *, 4> NoAliasInstrs;
> > -
> > - // Recursively track the user and check whether modified alias exist.
> > - auto IsDereferenceableOrNull = [](Value *V, const DataLayout &DL) -> bool {
> > - bool CanBeNull, CanBeFreed;
> > - return V->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed);
> > - };
> > -
> > - auto CaptureTrackingWithModRef =
> > - [&](Instruction *AI,
> > - function_ref<bool(Instruction *)> ModRefCallback) -> bool {
> > - SmallVector<Instruction *, 8> Worklist;
> > - Worklist.push_back(AI);
> > - unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking();
> > - Worklist.reserve(MaxUsesToExplore);
> > - SmallSet<const Use *, 20> Visited;
> > - while (!Worklist.empty()) {
> > - Instruction *I = Worklist.back();
> > - Worklist.pop_back();
> > - for (const Use &U : I->uses()) {
> > - if (Visited.size() >= MaxUsesToExplore) {
> > - LLVM_DEBUG(
> > - dbgs()
> > - << "Stack Move: Exceeded max uses to see ModRef, bailing\n");
> > - return false;
> > - }
> > - if (!Visited.insert(&U).second)
> > - continue;
> > - switch (DetermineUseCaptureKind(U, IsDereferenceableOrNull)) {
> > - case UseCaptureKind::MAY_CAPTURE:
> > - return false;
> > - case UseCaptureKind::PASSTHROUGH:
> > - // Instructions cannot have non-instruction users.
> > - Worklist.push_back(cast<Instruction>(U.getUser()));
> > - continue;
> > - case UseCaptureKind::NO_CAPTURE: {
> > - auto *UI = cast<Instruction>(U.getUser());
> > - if (DestAlloca->getParent() != UI->getParent())
> > - return false;
> > - if (!FirstUser || UI->comesBefore(FirstUser))
> > - FirstUser = UI;
> > - if (!LastUser || LastUser->comesBefore(UI))
> > - LastUser = UI;
> > - if (UI->hasMetadata(LLVMContext::MD_noalias))
> > - NoAliasInstrs.insert(UI);
> > - if (UI->isLifetimeStartOrEnd()) {
> > - // We note the locations of these intrinsic calls so that we can
> > - // delete them later if the optimization succeeds, this is safe
> > - // since both llvm.lifetime.start and llvm.lifetime.end intrinsics
> > - // conceptually fill all the bytes of the alloca with an undefined
> > - // value.
> > - int64_t Size = cast<ConstantInt>(UI->getOperand(0))->getSExtValue();
> > - if (Size < 0 || Size == DestSize) {
> > - LifetimeMarkers.push_back(UI);
> > - continue;
> > - }
> > - }
> > - if (!ModRefCallback(UI))
> > - return false;
> > - }
> > - }
> > - }
> > - }
> > - return true;
> > - };
> > -
> > - // 3. Check that dest has no Mod/Ref, except full size lifetime intrinsics,
> > - // from the alloca to the Store.
> > - ModRefInfo DestModRef = ModRefInfo::NoModRef;
> > - MemoryLocation DestLoc(DestAlloca, LocationSize::precise(Size));
> > - auto DestModRefCallback = [&](Instruction *UI) -> bool {
> > - // We don't care about the store itself.
> > - if (UI == Store)
> > - return true;
> > - ModRefInfo Res = BAA.getModRefInfo(UI, DestLoc);
> > - // FIXME: For multi-BB cases, we need to see reachability from it to
> > - // store.
> > - // Bailout if Dest may have any ModRef before Store.
> > - if (UI->comesBefore(Store) && isModOrRefSet(Res))
> > - return false;
> > - DestModRef |= BAA.getModRefInfo(UI, DestLoc);
> > -
> > - return true;
> > - };
> > -
> > - if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback))
> > - return false;
> > -
> > - // 3. Check that, from after the Load to the end of the BB,
> > - // 3-1. if the dest has any Mod, src has no Ref, and
> > - // 3-2. if the dest has any Ref, src has no Mod except full-sized lifetimes.
> > - MemoryLocation SrcLoc(SrcAlloca, LocationSize::precise(Size));
> > -
> > - auto SrcModRefCallback = [&](Instruction *UI) -> bool {
> > - // Any ModRef before Load doesn't matter, also Load and Store can be
> > - // ignored.
> > - if (UI->comesBefore(Load) || UI == Load || UI == Store)
> > - return true;
> > - ModRefInfo Res = BAA.getModRefInfo(UI, SrcLoc);
> > - if ((isModSet(DestModRef) && isRefSet(Res)) ||
> > - (isRefSet(DestModRef) && isModSet(Res)))
> > - return false;
> > -
> > - return true;
> > - };
> > -
> > - if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback))
> > - return false;
> > -
> > - // We can do the transformation. First, align the allocas appropriately.
> > - SrcAlloca->setAlignment(
> > - std::max(SrcAlloca->getAlign(), DestAlloca->getAlign()));
> > -
> > - // Merge the two allocas.
> > - DestAlloca->replaceAllUsesWith(SrcAlloca);
> > - eraseInstruction(DestAlloca);
> > -
> > - // Drop metadata on the source alloca.
> > - SrcAlloca->dropUnknownNonDebugMetadata();
> > -
> > - // Do "shrink wrap" the lifetimes, if the original lifetime intrinsics exists.
> > - if (!LifetimeMarkers.empty()) {
> > - LLVMContext &C = SrcAlloca->getContext();
> > - IRBuilder<> Builder(C);
> > -
> > - ConstantInt *AllocaSize = ConstantInt::get(Type::getInt64Ty(C), Size);
> > - // Create a new lifetime start marker before the first user of src or alloca
> > - // users.
> > - Builder.SetInsertPoint(FirstUser->getParent(), FirstUser->getIterator());
> > - Builder.CreateLifetimeStart(SrcAlloca, AllocaSize);
> > -
> > - // Create a new lifetime end marker after the last user of src or alloca
> > - // users.
> > - Builder.SetInsertPoint(LastUser->getParent(), ++LastUser->getIterator());
> > - Builder.CreateLifetimeEnd(SrcAlloca, AllocaSize);
> > -
> > - // Remove all other lifetime markers.
> > - for (Instruction *I : LifetimeMarkers)
> > - eraseInstruction(I);
> > - }
> > -
> > - // As this transformation can cause memory accesses that didn't previously
> > - // alias to begin to alias one another, we remove !noalias metadata from any
> > - // uses of either alloca. This is conservative, but more precision doesn't
> > - // seem worthwhile right now.
> > - for (Instruction *I : NoAliasInstrs)
> > - I->setMetadata(LLVMContext::MD_noalias, nullptr);
> > -
> > - LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n");
> > - NumStackMove++;
> > - return true;
> > -}
> > -
> > /// Perform simplification of memcpy's. If we have memcpy A
> > /// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
> > /// B to be a memcpy from X to Z (or potentially a memmove, depending on
> > @@ -1688,14 +1464,13 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
> > MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
> > AnyClobber, MemoryLocation::getForSource(M), BAA);
> >
> > - // There are five possible optimizations we can do for memcpy:
> > + // There are four possible optimizations we can do for memcpy:
> > // a) memcpy-memcpy xform which exposes redundance for DSE.
> > // b) call-memcpy xform for return slot optimization.
> > // c) memcpy from freshly alloca'd space or space that has just started
> > // its lifetime copies undefined data, and we can therefore eliminate
> > // the memcpy in favor of the data that was already at the destination.
> > // d) memcpy from a just-memset'd source can be turned into memset.
> > - // e) elimination of memcpy via stack-move optimization.
> > if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
> > if (Instruction *MI = MD->getMemoryInst()) {
> > if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
> > @@ -1714,8 +1489,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
> > }
> > }
> > if (auto *MDep = dyn_cast<MemCpyInst>(MI))
> > - if (processMemCpyMemCpyDependence(M, MDep, BAA))
> > - return true;
> > + return processMemCpyMemCpyDependence(M, MDep, BAA);
> > if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
> > if (performMemCpyToMemSetOptzn(M, MDep, BAA)) {
> > LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
> > @@ -1734,27 +1508,6 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
> > }
> > }
> >
> > - // If the transfer is from a stack slot to a stack slot, then we may be able
> > - // to perform the stack-move optimization. See the comments in
> > - // performStackMoveOptzn() for more details.
> > - auto *DestAlloca = dyn_cast<AllocaInst>(M->getDest());
> > - if (!DestAlloca)
> > - return false;
> > - auto *SrcAlloca = dyn_cast<AllocaInst>(M->getSource());
> > - if (!SrcAlloca)
> > - return false;
> > - ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength());
> > - if (Len == nullptr)
> > - return false;
> > - if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca, Len->getZExtValue(),
> > - BAA)) {
> > - // Avoid invalidating the iterator.
> > - BBI = M->getNextNonDebugInstruction()->getIterator();
> > - eraseInstruction(M);
> > - ++NumMemCpyInstr;
> > - return true;
> > - }
> > -
> > return false;
> > }
> >
> >
> > diff --git a/llvm/test/Transforms/MemCpyOpt/callslot.ll b/llvm/test/Transforms/MemCpyOpt/callslot.ll
> > index 8c769319236d65..3a6b3c3804b574 100644
> > --- a/llvm/test/Transforms/MemCpyOpt/callslot.ll
> > +++ b/llvm/test/Transforms/MemCpyOpt/callslot.ll
> > @@ -56,9 +56,11 @@ define void @write_dest_between_call_and_memcpy() {
> >
> > define void @write_src_between_call_and_memcpy() {
> > ; CHECK-LABEL: @write_src_between_call_and_memcpy(
> > +; CHECK-NEXT: [[DEST:%.*]] = alloca [16 x i8], align 1
> > ; CHECK-NEXT: [[SRC:%.*]] = alloca [16 x i8], align 1
> > ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[SRC]], i8 0, i64 16, i1 false)
> > ; CHECK-NEXT: store i8 1, ptr [[SRC]], align 1
> > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DEST]], ptr [[SRC]], i64 16, i1 false)
> > ; CHECK-NEXT: ret void
> > ;
> > %dest = alloca [16 x i8]
> >
> > diff --git a/llvm/test/Transforms/MemCpyOpt/stack-move.ll b/llvm/test/Transforms/MemCpyOpt/stack-move.ll
> > index 6a06c988bfa252..9ceb6edbfd997e 100644
> > --- a/llvm/test/Transforms/MemCpyOpt/stack-move.ll
> > +++ b/llvm/test/Transforms/MemCpyOpt/stack-move.ll
> > @@ -20,14 +20,19 @@ declare i32 @use_maycapture(ptr noundef)
> > declare i32 @use_readonly(ptr readonly)
> > declare i32 @use_writeonly(ptr noundef) memory(write)
> >
> > +; TODO: Merge alloca and remove memcpy.
> > define void @basic_memcpy() {
> > ; CHECK-LABEL: define void @basic_memcpy() {
> > ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
> > -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
> > +; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
> > +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
> > +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
> > ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
> > ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
> > -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
> > -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]])
> > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
> > +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]])
> > +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
> > +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]])
> > ; CHECK-NEXT: ret void
> > ;
> > %src = alloca %struct.Foo, align 4
> > @@ -46,14 +51,19 @@ define void @basic_memcpy() {
> > ret void
> > }
> >
> > +; TODO: Merge alloca and remove memmove.
> > define void @basic_memmove() {
> > ; CHECK-LABEL: define void @basic_memmove() {
> > ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
> > -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
> > +; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
> > +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
> > +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
> > ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
> > ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
> > -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
> > -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]])
> > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
> > +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]])
> > +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
> > +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]])
> > ; CHECK-NEXT: ret void
> > ;
> > %src = alloca %struct.Foo, align 4
> > @@ -72,15 +82,21 @@ define void @basic_memmove() {
> > ret void
> > }
> >
> > +; TODO: Merge alloca and remove load/store.
> > ; Tests that the optimization succeeds with a load/store pair.
> > define void @load_store() {
> > ; CHECK-LABEL: define void @load_store() {
> > ; CHECK-NEXT: [[SRC:%.*]] = alloca i32, align 4
> > -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[SRC]])
> > +; CHECK-NEXT: [[DEST:%.*]] = alloca i32, align 4
> > +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[SRC]])
> > +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[DEST]])
> > ; CHECK-NEXT: store i32 42, ptr [[SRC]], align 4
> > ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
> > -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
> > -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[SRC]])
> > +; CHECK-NEXT: [[SRC_VAL:%.*]] = load i32, ptr [[SRC]], align 4
> > +; CHECK-NEXT: store i32 [[SRC_VAL]], ptr [[DEST]], align 4
> > +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]])
> > +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[SRC]])
> > +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[DEST]])
> > ; CHECK-NEXT: ret void
> > ;
> > %src = alloca i32, align 4
> > @@ -99,15 +115,20 @@ define void @load_store() {
> > ret void
> > }
> >
> > +; TODO: Merge alloca.
> > ; Tests that merging two allocas shouldn't be more poisonous, smaller aligned src is valid.
> > define void @align_up() {
> > ; CHECK-LABEL: define void @align_up() {
> > -; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 8
> > -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
> > +; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
> > +; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 8
> > +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
> > +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
> > ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
> > ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
> > -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
> > -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]])
> > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
> > +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]])
> > +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
> > +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]])
> > ; CHECK-NEXT: ret void
> > ;
> > %src = alloca %struct.Foo, align 4
> > @@ -125,17 +146,26 @@ define void @align_up() {
> > ret void
> > }
> >
> > +; TODO: Merge alloca and remove memcpy, shrinkwrap lifetimes.
> > ; Tests that we correctly remove extra lifetime intrinsics when performing the
> > ; optimization.
> > define void @remove_extra_lifetime_intrinsics() {
> > ; CHECK-LABEL: define void @remove_extra_lifetime_intrinsics() {
> > ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
> > -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
> > +; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
> > +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
> > +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
> > ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
> > ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
> > -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
> > -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
> > -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]])
> > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
> > +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
> > +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]])
> > +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
> > +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
> > +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]])
> > +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
> > +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
> > +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]])
> > ; CHECK-NEXT: ret void
> > ;
> > %src = alloca %struct.Foo, align 4
> > @@ -158,14 +188,17 @@ define void @remove_extra_lifetime_intrinsics() {
> > ret void
> > }
> >
> > +; TODO: Merge alloca and remove memcpy, without inserting lifetime markers.
> > ; Tests that we won't insert lifetime markers if they don't exist originally.
> > define void @no_lifetime() {
> > ; CHECK-LABEL: define void @no_lifetime() {
> > ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
> > +; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
> > ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
> > ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
> > -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
> > -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
> > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
> > +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]])
> > +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]])
> > ; CHECK-NEXT: ret void
> > ;
> > %src = alloca %struct.Foo, align 4
> > @@ -180,17 +213,23 @@ define void @no_lifetime() {
> > ret void
> > }
> >
> > +
> > +; TODO: Merge alloca and remove memcpy.
> > ; Tests that aliasing src or dest but no modification desn't prevent transformations.
> > define void @alias_no_mod() {
> > ; CHECK-LABEL: define void @alias_no_mod() {
> > ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
> > -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
> > -; CHECK-NEXT: [[DEST_ALIAS:%.*]] = getelementptr [[STRUCT_FOO]], ptr [[SRC]], i32 0, i32 0
> > +; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
> > +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
> > +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
> > +; CHECK-NEXT: [[DEST_ALIAS:%.*]] = getelementptr [[STRUCT_FOO]], ptr [[DEST]], i32 0, i32 0
> > ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
> > ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
> > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
> > ; CHECK-NEXT: [[SRC_ALIAS:%.*]] = getelementptr [[STRUCT_FOO]], ptr [[SRC]], i32 0, i32 0
> > -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
> > -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]])
> > +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]])
> > +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
> > +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]])
> > ; CHECK-NEXT: ret void
> > ;
> > %src = alloca %struct.Foo, align 4
> > @@ -219,15 +258,20 @@ define void @alias_no_mod() {
> >
> > !3 = !{!"Whatever"}
> >
> > +; TODO: Merge alloca and remove memcpy, remove noalias metadata on src.
> > ; Tests that we remove scoped noalias metadata from a call.
> > define void @remove_scoped_noalias() {
> > ; CHECK-LABEL: define void @remove_scoped_noalias() {
> > ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
> > -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
> > +; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
> > +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
> > +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
> > ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
> > ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]), !alias.scope !0
> > -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
> > -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]])
> > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
> > +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
> > +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]]), !noalias !0
> > +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]])
> > ; CHECK-NEXT: ret void
> > ;
> > %src = alloca %struct.Foo, align 4
> > @@ -245,15 +289,20 @@ define void @remove_scoped_noalias() {
> > ret void
> > }
> >
> > +; TODO: Merge alloca and remove memcpy, remove noalias metadata on src.
> > ; Tests that we remove metadata on the merged alloca.
> > define void @remove_alloca_metadata() {
> > ; CHECK-LABEL: define void @remove_alloca_metadata() {
> > -; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
> > -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
> > +; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4, !annotation !3
> > +; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
> > +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
> > +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
> > ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
> > ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]), !alias.scope !0
> > -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
> > -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]])
> > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
> > +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]]), !noalias !0
> > +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
> > +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]])
> > ; CHECK-NEXT: ret void
> > ;
> > %src = alloca %struct.Foo, align 4, !annotation !3
> > @@ -271,15 +320,20 @@ define void @remove_alloca_metadata() {
> > ret void
> > }
> >
> > +; TODO: Merge alloca and remove memcpy.
> > ; Tests that we can merge alloca if the dest and src has only refs except lifetime intrinsics.
> > define void @src_ref_dest_ref_after_copy() {
> > ; CHECK-LABEL: define void @src_ref_dest_ref_after_copy() {
> > ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
> > -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
> > +; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
> > +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
> > +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
> > ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
> > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
> > ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_readonly(ptr nocapture [[SRC]])
> > -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_readonly(ptr nocapture [[SRC]])
> > -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]])
> > +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_readonly(ptr nocapture [[DEST]])
> > +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
> > +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]])
> > ; CHECK-NEXT: ret void
> > ;
> > %src = alloca %struct.Foo, align 4
> > @@ -301,11 +355,15 @@ define void @src_ref_dest_ref_after_copy() {
> > define void @src_mod_dest_mod_after_copy() {
> > ; CHECK-LABEL: define void @src_mod_dest_mod_after_copy() {
> > ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
> > -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]])
> > +; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
> > +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
> > +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
> > ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
> > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
> > ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_writeonly(ptr nocapture [[SRC]])
> > -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_writeonly(ptr nocapture [[SRC]])
> > -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]])
> > +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_writeonly(ptr nocapture [[DEST]])
> > +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
> > +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]])
> > ; CHECK-NEXT: ret void
> > ;
> > %src = alloca %struct.Foo, align 4
> > @@ -739,6 +797,7 @@ define void @src_captured() {
> > ret void
> > }
> >
> > +; TODO: Prevent this transformation
> > ; Tests that failure if any modref exists before the copy,
> > ; Exactly ref seems safe because no mod say ref would be always undefined, but to make simple and conservative.
> > define void @mod_ref_before_copy() {
> > @@ -772,6 +831,7 @@ define void @mod_ref_before_copy() {
> > ret void
> > }
> >
> > +; TODO: Prevent this transformation
> > ; Tests that failure because copy semantics will change if dest is replaced with src.
> > define void @mod_dest_before_copy() {
> > ; CHECK-LABEL: define void @mod_dest_before_copy() {
> > @@ -804,6 +864,7 @@ define void @mod_dest_before_copy() {
> > ret void
> > }
> >
> > +; TODO: Prevent transformations
> > define void @mod_src_before_store_after_load() {
> > ; CHECK-LABEL: define void @mod_src_before_store_after_load() {
> > ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
> >
> >
> >
> > _______________________________________________
> > llvm-commits mailing list
> > llvm-commits at lists.llvm.org
> > https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
More information about the llvm-commits
mailing list