[llvm] [memcpyopt] allow some undef contents overread in processMemCpyMemCpyDependence (PR #143745)
Jameson Nash via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 11 17:10:53 PDT 2025
https://github.com/vtjnash updated https://github.com/llvm/llvm-project/pull/143745
>From 244e3c906f2c57a1a3dd3c8fe0459e1bd045b233 Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash at gmail.com>
Date: Wed, 11 Jun 2025 15:32:25 +0000
Subject: [PATCH] [memcpyopt] allow some undef contents overread in
processMemCpyMemCpyDependence
---
.../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 53 +++++++++++++------
.../MemCpyOpt/memcpy-memcpy-offset.ll | 31 ++++++++---
.../MemCpyOpt/variable-sized-memcpy-memcpy.ll | 37 ++++++++++++-
3 files changed, 97 insertions(+), 24 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 1c4ec6aa08b43..9ab8d645e5911 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -107,6 +107,9 @@ struct MemsetRange {
} // end anonymous namespace
+static bool overreadUndefContents(MemorySSA *MSSA, MemCpyInst *MemCpy,
+ MemIntrinsic *MemSrc, BatchAAResults &BAA);
+
bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
// If we found more than 4 stores to merge or 16 bytes, use memset.
if (TheStores.size() >= 4 || End - Start >= 16)
@@ -1129,14 +1132,28 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
MForwardOffset = *Offset;
}
- // The length of the memcpy's must be the same, or the preceding one
- // must be larger than the following one.
- if (MForwardOffset != 0 || MDep->getLength() != M->getLength()) {
+ Value *CopyLength = M->getLength();
+
+ // The length of the memcpy's must be the same, or the preceding one must be
+ // larger than the following one, or the contents of the overread must be
+ // undefined bytes of a defined size.
+ if (MForwardOffset != 0 || MDep->getLength() != CopyLength) {
auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
- auto *MLen = dyn_cast<ConstantInt>(M->getLength());
- if (!MDepLen || !MLen ||
- MDepLen->getZExtValue() < MLen->getZExtValue() + MForwardOffset)
- return false;
+ auto *MLen = dyn_cast<ConstantInt>(CopyLength);
+ if (!MDepLen || !MLen)
+ return false; // This could be converted to a runtime test (%CopyLength =
+ // min(max(0, MDepLen - MForwardOffset), MLen)), but it is
+ // unclear if that is useful
+ if (MDepLen->getZExtValue() < MLen->getZExtValue() + MForwardOffset) {
+ if (!overreadUndefContents(MSSA, M, MDep, BAA))
+ return false;
+ if (MDepLen->getZExtValue() <= (uint64_t)MForwardOffset)
+ return false; // Should not reach here (there is obviously no aliasing
+ // with MDep), so just bail in case it had incomplete info
+ // somehow
+ CopyLength = ConstantInt::get(CopyLength->getType(),
+ MDepLen->getZExtValue() - MForwardOffset);
+ }
}
IRBuilder<> Builder(M);
@@ -1152,9 +1169,13 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
eraseInstruction(NewCopySource);
});
MaybeAlign CopySourceAlign = MDep->getSourceAlign();
- // We just need to calculate the actual size of the copy.
- auto MCopyLoc = MemoryLocation::getForSource(MDep).getWithNewSize(
- MemoryLocation::getForSource(M).Size);
+ auto MCopyLoc = MemoryLocation::getForSource(MDep);
+ // Truncate the size of the MDep access to just the bytes read
+ if (MDep->getLength() != CopyLength) {
+ auto ConstLength = cast<ConstantInt>(CopyLength);
+ MCopyLoc = MCopyLoc.getWithNewSize(
+ LocationSize::precise(ConstLength->getZExtValue()));
+ }
// When the forwarding offset is greater than 0, we transform
// memcpy(d1 <- s1)
@@ -1223,20 +1244,18 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
// example we could be moving from movaps -> movq on x86.
Instruction *NewM;
if (UseMemMove)
- NewM =
- Builder.CreateMemMove(M->getDest(), M->getDestAlign(), CopySource,
- CopySourceAlign, M->getLength(), M->isVolatile());
+ NewM = Builder.CreateMemMove(M->getDest(), M->getDestAlign(), CopySource,
+ CopySourceAlign, CopyLength, M->isVolatile());
else if (M->isForceInlined())
// llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
// never allowed since that would allow the latter to be lowered as a call
// to an external function.
NewM = Builder.CreateMemCpyInline(M->getDest(), M->getDestAlign(),
- CopySource, CopySourceAlign,
- M->getLength(), M->isVolatile());
+ CopySource, CopySourceAlign, CopyLength,
+ M->isVolatile());
else
NewM = Builder.CreateMemCpy(M->getDest(), M->getDestAlign(), CopySource,
- CopySourceAlign, M->getLength(),
- M->isVolatile());
+ CopySourceAlign, CopyLength, M->isVolatile());
NewM->copyMetadata(*M, LLVMContext::MD_DIAssignID);
diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy-memcpy-offset.ll b/llvm/test/Transforms/MemCpyOpt/memcpy-memcpy-offset.ll
index da654438d7bd6..c5406c5c64340 100644
--- a/llvm/test/Transforms/MemCpyOpt/memcpy-memcpy-offset.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memcpy-memcpy-offset.ll
@@ -135,13 +135,14 @@ define void @forward_offset_memcpy_inline(ptr %src, ptr %dest) {
}
; We cannot forward `memcpy` because it exceeds the size of `memcpy` it depends on.
-define void @do_not_forward_oversize_offset(ptr %src, ptr %dest) {
-; CHECK-LABEL: define void @do_not_forward_oversize_offset(
+define void @forward_oversize_offset(ptr %src, ptr %dest) {
+; CHECK-LABEL: define void @forward_oversize_offset(
; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) {
-; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[SRC]], i64 6, i1 false)
-; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP_OFFSET]], i64 6, i1 false)
+; CHECK-NEXT: [[CPY_TMP:%.*]] = alloca [9 x i8], align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[CPY_TMP]], ptr align 1 [[SRC]], i64 6, i1 false)
+; CHECK-NEXT: [[CPY_TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[CPY_TMP]], i64 1
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1
+; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP1]], i64 5, i1 false)
; CHECK-NEXT: ret void
;
%cpy_tmp = alloca %buf, align 1
@@ -214,6 +215,24 @@ define void @pr98675(ptr noalias %p1, ptr noalias %p2) {
ret void
}
+define void @over_offset_cpy(ptr %src) {
+; CHECK-LABEL: define void @over_offset_cpy(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[TMP:%.*]] = alloca i8, i64 2, align 1
+; CHECK-NEXT: [[DST:%.*]] = alloca i8, i64 1, align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP]], ptr align 8 [[SRC]], i64 1, i1 false)
+; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 1
+; CHECK-NEXT: ret void
+;
+ %tmp = alloca i8, i64 2
+ %dst = alloca i8, i64 1
+ call void @llvm.memcpy.p0.p0.i64(ptr align 8 %tmp, ptr align 8 %src, i64 1, i1 false)
+ %tmp_offset = getelementptr inbounds i8, ptr %tmp, i64 1
+ call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %tmp_offset, i64 1, i1 false)
+
+ ret void
+}
+
declare void @use(ptr)
declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1)
diff --git a/llvm/test/Transforms/MemCpyOpt/variable-sized-memcpy-memcpy.ll b/llvm/test/Transforms/MemCpyOpt/variable-sized-memcpy-memcpy.ll
index 4f6b734ec057d..95402a8ea686c 100644
--- a/llvm/test/Transforms/MemCpyOpt/variable-sized-memcpy-memcpy.ll
+++ b/llvm/test/Transforms/MemCpyOpt/variable-sized-memcpy-memcpy.ll
@@ -18,7 +18,42 @@ define void @test(ptr %src, i64 %size) {
ret void
}
-; Differing sizes, so left as it is.
+define void @dynalloca_test(ptr %src, i64 %size1) {
+; CHECK-LABEL: @dynalloca_test(
+; CHECK-NEXT: [[TMP:%.*]] = alloca i8, i64 [[SIZE1:%.*]], align 1
+; CHECK-NEXT: [[DST:%.*]] = alloca i8, i64 [[SIZE1]], align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP]], ptr align 8 [[SRC:%.*]], i64 31, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST]], ptr align 8 [[SRC]], i64 31, i1 false)
+; CHECK-NEXT: ret void
+;
+ %tmp = alloca i8, i64 %size1
+ %dst = alloca i8, i64 %size1
+ call void @llvm.memcpy.p0.p0.i64(ptr align 8 %tmp, ptr align 8 %src, i64 31, i1 false)
+ call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %tmp, i64 32, i1 false)
+
+ ret void
+}
+
+define void @dynalloca_offset_test(ptr %src, i64 %size1) {
+; CHECK-LABEL: @dynalloca_offset_test(
+; CHECK-NEXT: [[TMP:%.*]] = alloca i8, i64 [[SIZE1:%.*]], align 1
+; CHECK-NEXT: [[DST:%.*]] = alloca i8, i64 [[SIZE1]], align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP]], ptr align 8 [[SRC:%.*]], i64 31, i1 false)
+; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 1
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST]], ptr align 1 [[TMP1]], i64 30, i1 false)
+; CHECK-NEXT: ret void
+;
+ %tmp = alloca i8, i64 %size1
+ %dst = alloca i8, i64 %size1
+ call void @llvm.memcpy.p0.p0.i64(ptr align 8 %tmp, ptr align 8 %src, i64 31, i1 false)
+ %tmp_offset = getelementptr inbounds i8, ptr %tmp, i64 1
+ call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %tmp_offset, i64 31, i1 false)
+
+ ret void
+}
+
+; Dynamic sizes, so left as it is.
define void @negative_test(ptr %src, i64 %size1, i64 %size2) {
; CHECK-LABEL: @negative_test(
; CHECK-NEXT: [[TMP:%.*]] = alloca i8, i64 [[SIZE1:%.*]], align 1
More information about the llvm-commits
mailing list