[llvm] 1d6ab18 - [MemCpyOpt] Drop dead `memmove` calls on `memset`'d source data
Antonio Frighetto via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 3 00:54:37 PST 2024
Author: Antonio Frighetto
Date: 2024-12-03T09:50:57+01:00
New Revision: 1d6ab189be031bf723abf35f772fbd5d4c86c612
URL: https://github.com/llvm/llvm-project/commit/1d6ab189be031bf723abf35f772fbd5d4c86c612
DIFF: https://github.com/llvm/llvm-project/commit/1d6ab189be031bf723abf35f772fbd5d4c86c612.diff
LOG: [MemCpyOpt] Drop dead `memmove` calls on `memset`'d source data
When a memmove happens to clobber source data, and such data have
been previously memset'd, the memmove may be redundant.
Added:
Modified:
llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
index 023c9de28209c8..496d2958fc2d0f 100644
--- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
+++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
@@ -68,7 +68,7 @@ class MemCpyOptPass : public PassInfoMixin<MemCpyOptPass> {
BasicBlock::iterator &BBI);
bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI);
bool processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI);
- bool processMemMove(MemMoveInst *M);
+ bool processMemMove(MemMoveInst *M, BasicBlock::iterator &BBI);
bool performCallSlotOptzn(Instruction *cpyLoad, Instruction *cpyStore,
Value *cpyDst, Value *cpySrc, TypeSize cpyLen,
Align cpyAlign, BatchAAResults &BAA,
@@ -87,6 +87,7 @@ class MemCpyOptPass : public PassInfoMixin<MemCpyOptPass> {
bool performStackMoveOptzn(Instruction *Load, Instruction *Store,
AllocaInst *DestAlloca, AllocaInst *SrcAlloca,
TypeSize Size, BatchAAResults &BAA);
+ bool isMemMoveMemSetDependency(MemMoveInst *M);
void eraseInstruction(Instruction *I);
bool iterateOnFunction(Function &F);
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index e9e1071ea210c4..0cba5d077da62b 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -68,6 +68,7 @@ static cl::opt<bool> EnableMemCpyOptWithoutLibcalls(
cl::desc("Enable memcpyopt even when libcalls are disabled"));
STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
+STATISTIC(NumMemMoveInstr, "Number of memmove instructions deleted");
STATISTIC(NumMemSetInfer, "Number of memsets inferred");
STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
@@ -1841,12 +1842,75 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
return false;
}
+/// Memmove calls with overlapping src/dest buffers that come after a memset may
+/// be removed.
+bool MemCpyOptPass::isMemMoveMemSetDependency(MemMoveInst *M) {
+ const auto &DL = M->getDataLayout();
+ MemoryUseOrDef *MemMoveAccess = MSSA->getMemoryAccess(M);
+ if (!MemMoveAccess)
+ return false;
+
+ // The memmove is of form memmove(x, x + A, B).
+ MemoryLocation SourceLoc = MemoryLocation::getForSource(M);
+ auto *MemMoveSourceOp = M->getSource();
+ auto *Source = dyn_cast<GEPOperator>(MemMoveSourceOp);
+ if (!Source)
+ return false;
+
+ APInt Offset(DL.getIndexTypeSizeInBits(Source->getType()), 0);
+ LocationSize MemMoveLocSize = SourceLoc.Size;
+ if (Source->getPointerOperand() != M->getDest() ||
+ !MemMoveLocSize.hasValue() ||
+ !Source->accumulateConstantOffset(DL, Offset) || Offset.isNegative()) {
+ return false;
+ }
+
+ uint64_t MemMoveSize = MemMoveLocSize.getValue();
+ LocationSize TotalSize =
+ LocationSize::precise(Offset.getZExtValue() + MemMoveSize);
+ MemoryLocation CombinedLoc(M->getDest(), TotalSize);
+
+ // The first dominating clobbering MemoryAccess for the combined location
+ // needs to be a memset.
+ BatchAAResults BAA(*AA);
+ MemoryAccess *FirstDef = MemMoveAccess->getDefiningAccess();
+ auto *DestClobber = dyn_cast<MemoryDef>(
+ MSSA->getWalker()->getClobberingMemoryAccess(FirstDef, CombinedLoc, BAA));
+ if (!DestClobber)
+ return false;
+
+ auto *MS = dyn_cast_or_null<MemSetInst>(DestClobber->getMemoryInst());
+ if (!MS)
+ return false;
+
+ // Memset length must be sufficiently large.
+ auto *MemSetLength = dyn_cast<ConstantInt>(MS->getLength());
+ if (!MemSetLength || MemSetLength->getZExtValue() < MemMoveSize)
+ return false;
+
+ // The destination buffer must have been memset'd.
+ if (!BAA.isMustAlias(MS->getDest(), M->getDest()))
+ return false;
+
+ return true;
+}
+
/// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
/// not to alias.
-bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
+bool MemCpyOptPass::processMemMove(MemMoveInst *M, BasicBlock::iterator &BBI) {
// See if the source could be modified by this memmove potentially.
- if (isModSet(AA->getModRefInfo(M, MemoryLocation::getForSource(M))))
+ if (isModSet(AA->getModRefInfo(M, MemoryLocation::getForSource(M)))) {
+ // On the off-chance the memmove clobbers src with previously memset'd
+ // bytes, the memmove may be redundant.
+ if (!M->isVolatile() && isMemMoveMemSetDependency(M)) {
+ LLVM_DEBUG(dbgs() << "Removed redundant memmove.\n");
+ ++BBI;
+ eraseInstruction(M);
+ ++NumMemMoveInstr;
+ return true;
+ }
return false;
+ }
LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
<< "\n");
@@ -2064,7 +2128,7 @@ bool MemCpyOptPass::iterateOnFunction(Function &F) {
else if (auto *M = dyn_cast<MemCpyInst>(I))
RepeatInstruction = processMemCpy(M, BI);
else if (auto *M = dyn_cast<MemMoveInst>(I))
- RepeatInstruction = processMemMove(M);
+ RepeatInstruction = processMemMove(M, BI);
else if (auto *CB = dyn_cast<CallBase>(I)) {
for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) {
if (CB->isByValArgument(i))
diff --git a/llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll b/llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll
index 4b09a2057b4c31..c7593e2941518a 100644
--- a/llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll
@@ -7,7 +7,6 @@ define i32 @redundant_memmove() {
; CHECK-NEXT: [[ARRAY:%.*]] = alloca [26 x i32], align 16
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ARRAY]], i8 0, i64 104, i1 false)
; CHECK-NEXT: [[ARRAY_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY]], i64 4
-; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 16 [[ARRAY]], ptr align 4 [[ARRAY_IDX]], i64 100, i1 false)
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAY]], align 16
; CHECK-NEXT: ret i32 [[VAL]]
;
@@ -88,7 +87,6 @@ define i32 @redundant_memmove_
diff erent_bbs() {
; CHECK-NEXT: [[ARRAY_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY]], i64 4
; CHECK-NEXT: br label [[USE:%.*]]
; CHECK: use:
-; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 16 [[ARRAY]], ptr align 4 [[ARRAY_IDX]], i64 100, i1 false)
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAY]], align 16
; CHECK-NEXT: ret i32 [[VAL]]
;
@@ -110,7 +108,6 @@ use: ; preds = %entry
define ptr @redundant_memmove_memset_global_variable() {
; CHECK-LABEL: @redundant_memmove_memset_global_variable(
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 @g_var, i8 0, i64 104, i1 false)
-; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 16 @g_var, ptr align 4 getelementptr inbounds nuw (i8, ptr @g_var, i64 4), i64 100, i1 false)
; CHECK-NEXT: ret ptr @g_var
;
call void @llvm.memset.p0.i64(ptr align 16 @g_var, i8 0, i64 104, i1 false)
More information about the llvm-commits
mailing list