[llvm] [memcpyopt] fix incorrect handling of lifetime markers (PR #143782)
Jameson Nash via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 11 13:54:44 PDT 2025
https://github.com/vtjnash updated https://github.com/llvm/llvm-project/pull/143782
>From a2f0414a052397ac6a85afc6be3c76ec9ba626f7 Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash at gmail.com>
Date: Wed, 11 Jun 2025 20:20:55 +0000
Subject: [PATCH] [memcpyopt] fix incorrect handling of lifetime markers
Having lifetime markers should only increase the information available
to LLVM, but it was stopping the MemSSA walk and unable to see that
these were extra.
Secondly, it would ignore a lifetime marker that wasn't full size
(relying on the callback to forbid the optimization entirely), but again
sub-optimal lifetime markers are not supposed to be forbidding
optimizations that would otherwise apply if they were either absent or
optimal. This pass wasn't tracking GEP offsets either, so it wasn't
quite correctly handled either (although earlier sub-optimal checks that
this size is the same as the alloca test made this safe in the past).
Lastly, the stack-move test seemed to have been providing lifetime in
bits instead of bytes, although now we optimize it regardless of that,
so we didn't really have to fix that.
---
.../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 110 ++++++++++--------
.../ScopedNoAliasAA/alias-scope-merging.ll | 20 ++--
.../Transforms/MemCpyOpt/callslot_badaa.ll | 18 ++-
.../test/Transforms/MemCpyOpt/memcpy-undef.ll | 2 -
llvm/test/Transforms/MemCpyOpt/stack-move.ll | 15 +--
5 files changed, 90 insertions(+), 75 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 960001bf880c6..877600034f84e 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1366,56 +1366,68 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
/// Determine whether the pointer V had only undefined content (due to Def) up
/// to the given Size, either because it was freshly alloca'd or started its
-/// lifetime.
-static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V,
- MemoryDef *Def, Value *Size) {
- if (MSSA->isLiveOnEntryDef(Def))
- return isa<AllocaInst>(getUnderlyingObject(V));
-
- if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
- if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
- auto *LTSize = cast<ConstantInt>(II->getArgOperand(0));
-
- if (auto *CSize = dyn_cast<ConstantInt>(Size)) {
- if (AA.isMustAlias(V, II->getArgOperand(1)) &&
- LTSize->getZExtValue() >= CSize->getZExtValue())
- return true;
- }
+/// lifetime by walking the MSSA graph.
+static bool hadUndefContentsBefore(MemorySSA *MSSA, BatchAAResults &BAA,
+ Value *V, MemoryAccess *Clobber,
+ MemoryLocation Loc, Value *Size) {
+ while (1) {
+ Clobber = MSSA->getWalker()->getClobberingMemoryAccess(Clobber, Loc, BAA);
+ MemoryDef *Def = dyn_cast<MemoryDef>(Clobber);
+ if (!Def)
+ return false;
+
+ if (MSSA->isLiveOnEntryDef(Def))
+ return isa<AllocaInst>(getUnderlyingObject(V));
+
+ if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
+ auto *LTSize = cast<ConstantInt>(II->getArgOperand(0));
- // If the lifetime.start covers a whole alloca (as it almost always
- // does) and we're querying a pointer based on that alloca, then we know
- // the memory is definitely undef, regardless of how exactly we alias.
- // The size also doesn't matter, as an out-of-bounds access would be UB.
- if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V))) {
- if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) {
- const DataLayout &DL = Alloca->getDataLayout();
- if (std::optional<TypeSize> AllocaSize =
- Alloca->getAllocationSize(DL))
- if (*AllocaSize == LTSize->getValue())
+ if (Size)
+ if (auto CSize = dyn_cast<ConstantInt>(Size))
+ if (BAA.isMustAlias(V, II->getArgOperand(1)) &&
+ LTSize->getZExtValue() >= CSize->getZExtValue())
return true;
+
+ // If the lifetime.start covers a whole alloca (as it almost always
+ // does) and we're querying a pointer based on that alloca, then we know
+ // the memory is definitely undef, regardless of how exactly we alias.
+ // The size also doesn't matter, as an out-of-bounds access would be UB.
+ if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V))) {
+ if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) {
+ const DataLayout &DL = Alloca->getDataLayout();
+ if (std::optional<TypeSize> AllocaSize =
+ Alloca->getAllocationSize(DL))
+ if (*AllocaSize == LTSize->getValue())
+ return true;
+ }
}
+ Clobber = Def->getDefiningAccess();
+ continue;
+ } else if (II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ Clobber = Def->getDefiningAccess();
+ continue;
}
}
- }
- return false;
+ return false;
+ }
}
// If the memcpy is larger than the previous, but the memory was undef prior to
// that, we can just ignore the tail. Technically we're only interested in the
// bytes from 0..MemSrcOffset and MemSrcLength+MemSrcOffset..CopySize here, but
-// as we can't easily represent this location (hasUndefContents uses mustAlias
-// which cannot deal with offsets), we use the full 0..CopySize range.
+// as we can't easily represent this location (hadUndefContentsBefore uses
+// mustAlias which cannot deal with offsets), we use the full 0..CopySize range.
static bool overreadUndefContents(MemorySSA *MSSA, MemCpyInst *MemCpy,
MemIntrinsic *MemSrc, BatchAAResults &BAA) {
Value *CopySize = MemCpy->getLength();
- MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
- MemoryUseOrDef *MemSrcAccess = MSSA->getMemoryAccess(MemSrc);
- MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
- MemSrcAccess->getDefiningAccess(), MemCpyLoc, BAA);
- if (auto *MD = dyn_cast<MemoryDef>(Clobber))
- if (hasUndefContents(MSSA, BAA, MemCpy->getSource(), MD, CopySize))
- return true;
+ MemoryLocation LoadLoc = MemoryLocation::getForSource(MemCpy);
+ MemoryAccess *MemSrcAccess =
+ MSSA->getMemoryAccess(MemSrc)->getDefiningAccess();
+ if (hadUndefContentsBefore(MSSA, BAA, MemCpy->getSource(), MemSrcAccess,
+ LoadLoc, CopySize))
+ return true;
return false;
}
@@ -1573,11 +1585,14 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
// since both llvm.lifetime.start and llvm.lifetime.end intrinsics
// practically fill all the bytes of the alloca with an undefined
// value, although conceptually marked as alive/dead.
- int64_t Size = cast<ConstantInt>(UI->getOperand(0))->getSExtValue();
- if (Size < 0 || Size == DestSize) {
- LifetimeMarkers.push_back(UI);
- continue;
- }
+ // We don't currently track GEP offsets and sizes, so we don't have
+ // a way to check whether this lifetime marker affects the relevant
+ // memory regions.
+ // While we only really need to delete lifetime.end from Src and
+ // lifetime.begin from Dst, those are often implied by the memcpy
+ // anyways so hopefully not much is lost by removing all of them.
+ LifetimeMarkers.push_back(UI);
+ continue;
}
AAMetadataInstrs.insert(UI);
@@ -1594,9 +1609,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
return true;
};
- // Check that dest has no Mod/Ref, from the alloca to the Store, except full
- // size lifetime intrinsics. And collect modref inst for the reachability
- // check.
+ // Check that dest has no Mod/Ref, from the alloca to the Store. And collect
+ // modref inst for the reachability check.
ModRefInfo DestModRef = ModRefInfo::NoModRef;
MemoryLocation DestLoc(DestAlloca, LocationSize::precise(Size));
SmallVector<BasicBlock *, 8> ReachabilityWorklist;
@@ -1779,8 +1793,9 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
if (processMemSetMemCpyDependence(M, MDep, BAA))
return true;
- MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
- AnyClobber, MemoryLocation::getForSource(M), BAA);
+ MemoryLocation SrcLoc = MemoryLocation::getForSource(M);
+ MemoryAccess *SrcClobber =
+ MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, SrcLoc, BAA);
// There are five possible optimizations we can do for memcpy:
// a) memcpy-memcpy xform which exposes redundance for DSE.
@@ -1820,7 +1835,8 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
}
}
- if (hasUndefContents(MSSA, BAA, M->getSource(), MD, M->getLength())) {
+ if (hadUndefContentsBefore(MSSA, BAA, M->getSource(), AnyClobber, SrcLoc,
+ M->getLength())) {
LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
eraseInstruction(M);
++NumMemCpyInstr;
diff --git a/llvm/test/Analysis/ScopedNoAliasAA/alias-scope-merging.ll b/llvm/test/Analysis/ScopedNoAliasAA/alias-scope-merging.ll
index 989049ab67a0b..3b08e4404dde2 100644
--- a/llvm/test/Analysis/ScopedNoAliasAA/alias-scope-merging.ll
+++ b/llvm/test/Analysis/ScopedNoAliasAA/alias-scope-merging.ll
@@ -1,25 +1,27 @@
-; RUN: opt < %s -S -passes=memcpyopt | FileCheck --match-full-lines %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -S -passes=memcpyopt | FileCheck %s
; Alias scopes are merged by taking the intersection of domains, then the union of the scopes within those domains
define i8 @test(i8 %input) {
+; CHECK-LABEL: define i8 @test(
+; CHECK-SAME: i8 [[INPUT:%.*]]) {
+; CHECK-NEXT: [[SRC:%.*]] = alloca i8, align 1
+; CHECK-NEXT: store i8 [[INPUT]], ptr [[SRC]], align 1
+; CHECK-NEXT: [[RET_VALUE:%.*]] = load i8, ptr [[SRC]], align 1
+; CHECK-NEXT: ret i8 [[RET_VALUE]]
+;
%tmp = alloca i8
%dst = alloca i8
%src = alloca i8
-; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %src, i64 1, i1 false), !alias.scope ![[SCOPE:[0-9]+]]
- call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %src), !noalias !4
+ call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %src), !noalias !4
store i8 %input, ptr %src
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %tmp, ptr align 8 %src, i64 1, i1 false), !alias.scope !0
- call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %src), !noalias !4
+ call void @llvm.lifetime.end.p0(i64 1, ptr nonnull %src), !noalias !4
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %tmp, i64 1, i1 false), !alias.scope !4
%ret_value = load i8, ptr %dst
ret i8 %ret_value
}
-; Merged scope contains "callee0: %a" and "callee0 : %b"
-; CHECK-DAG: ![[CALLEE0_A:[0-9]+]] = distinct !{!{{[0-9]+}}, !{{[0-9]+}}, !"callee0: %a"}
-; CHECK-DAG: ![[CALLEE0_B:[0-9]+]] = distinct !{!{{[0-9]+}}, !{{[0-9]+}}, !"callee0: %b"}
-; CHECK-DAG: ![[SCOPE]] = !{![[CALLEE0_A]], ![[CALLEE0_B]]}
-
declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
diff --git a/llvm/test/Transforms/MemCpyOpt/callslot_badaa.ll b/llvm/test/Transforms/MemCpyOpt/callslot_badaa.ll
index efdbdce401b76..fb5d675402eba 100644
--- a/llvm/test/Transforms/MemCpyOpt/callslot_badaa.ll
+++ b/llvm/test/Transforms/MemCpyOpt/callslot_badaa.ll
@@ -1,9 +1,10 @@
-; RUN: opt < %s -S -passes=memcpyopt | FileCheck --match-full-lines %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -S -passes=memcpyopt | FileCheck %s
; Make sure callslot optimization merges alias.scope metadata correctly when it merges instructions.
; Merging here naively generates:
; call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %src, i64 1, i1 false), !alias.scope !3
-; call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %src), !noalias !0
+; call void @llvm.lifetime.end.p0(i64 1, ptr nonnull %src), !noalias !0
; ...
; !0 = !{!1}
; !1 = distinct !{!1, !2, !"callee1: %a"}
@@ -13,15 +14,20 @@
; !5 = distinct !{!5, !"callee0"}
; Which is incorrect because the lifetime.end of %src will now "noalias" the above memcpy.
define i8 @test(i8 %input) {
+; CHECK-LABEL: define i8 @test(
+; CHECK-SAME: i8 [[INPUT:%.*]]) {
+; CHECK-NEXT: [[SRC:%.*]] = alloca i8, align 1
+; CHECK-NEXT: store i8 [[INPUT]], ptr [[SRC]], align 1
+; CHECK-NEXT: [[RET_VALUE:%.*]] = load i8, ptr [[SRC]], align 1
+; CHECK-NEXT: ret i8 [[RET_VALUE]]
+;
%tmp = alloca i8
%dst = alloca i8
%src = alloca i8
-; NOTE: we're matching the full line and looking for the lack of !alias.scope here
-; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %src, i64 1, i1 false)
- call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %src), !noalias !3
+ call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %src), !noalias !3
store i8 %input, ptr %src
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %tmp, ptr align 8 %src, i64 1, i1 false), !alias.scope !0
- call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %src), !noalias !3
+ call void @llvm.lifetime.end.p0(i64 1, ptr nonnull %src), !noalias !3
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %tmp, i64 1, i1 false), !alias.scope !3
%ret_value = load i8, ptr %dst
ret i8 %ret_value
diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy-undef.ll b/llvm/test/Transforms/MemCpyOpt/memcpy-undef.ll
index 2f1ce37ea2256..a463fed3ac58c 100644
--- a/llvm/test/Transforms/MemCpyOpt/memcpy-undef.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memcpy-undef.ll
@@ -96,7 +96,6 @@ define void @test_lifetime_partial_alias_3(ptr noalias %dst) {
; CHECK-NEXT: [[A:%.*]] = alloca [16 x i8], align 1
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[A]])
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 8
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST:%.*]], ptr [[GEP]], i64 4, i1 false)
; CHECK-NEXT: ret void
;
%a = alloca [16 x i8]
@@ -112,7 +111,6 @@ define void @test_lifetime_partial_alias_4(ptr noalias %dst) {
; CHECK-NEXT: [[A:%.*]] = alloca [16 x i8], align 1
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[A]])
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 8
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST:%.*]], ptr [[GEP]], i64 8, i1 false)
; CHECK-NEXT: ret void
;
%a = alloca [16 x i8]
diff --git a/llvm/test/Transforms/MemCpyOpt/stack-move.ll b/llvm/test/Transforms/MemCpyOpt/stack-move.ll
index 4a75c5eea2499..31e255b83eb9e 100644
--- a/llvm/test/Transforms/MemCpyOpt/stack-move.ll
+++ b/llvm/test/Transforms/MemCpyOpt/stack-move.ll
@@ -1649,20 +1649,13 @@ loop_exit:
ret void
}
-; Tests that failure because partial-sized lifetimes are counted as mod.
+; Tests that partial-sized lifetimes are not inhibiting the optimizer
define void @partial_lifetime() {
; CHECK-LABEL: define void @partial_lifetime() {
-; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
-; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr captures(none) [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 3, ptr captures(none) [[DEST]])
-; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr captures(none) [[SRC]])
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 3, ptr captures(none) [[SRC]])
+; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[DEST]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr captures(none) [[DEST]])
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr captures(none) [[DEST]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr captures(none) [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr captures(none) [[DEST]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4
More information about the llvm-commits
mailing list