[llvm] [MemCpyOpt] Drop dead `memmove` calls on `memset`'d source data (PR #101930)
Antonio Frighetto via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 27 10:41:11 PST 2024
https://github.com/antoniofrighetto updated https://github.com/llvm/llvm-project/pull/101930
>From 2c338b2ef1c7c67edce917b491309452dbfc788c Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Fri, 20 Sep 2024 17:41:06 +0200
Subject: [PATCH 1/3] [MemCpyOpt] Introduce test for PR101930 (NFC)
---
.../memset-memmove-redundant-memmove.ll | 109 ++++++++++++++++++
1 file changed, 109 insertions(+)
create mode 100644 llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll
diff --git a/llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll b/llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll
new file mode 100644
index 00000000000000..5b323f119f0950
--- /dev/null
+++ b/llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll
@@ -0,0 +1,109 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=memcpyopt -S %s -verify-memoryssa | FileCheck %s
+
+; Redundant memmove.
+define i32 @redundant_memmove() {
+; CHECK-LABEL: @redundant_memmove(
+; CHECK-NEXT: [[ARRAY:%.*]] = alloca [26 x i32], align 16
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ARRAY]], i8 0, i64 104, i1 false)
+; CHECK-NEXT: [[ARRAY_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY]], i64 4
+; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 16 [[ARRAY]], ptr align 4 [[ARRAY_IDX]], i64 100, i1 false)
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAY]], align 16
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+ %array = alloca [26 x i32], align 16
+ call void @llvm.memset.p0.i64(ptr align 16 %array, i8 0, i64 104, i1 false)
+ %array.idx = getelementptr inbounds i8, ptr %array, i64 4
+ call void @llvm.memmove.p0.p0.i64(ptr align 16 %array, ptr align 4 %array.idx, i64 100, i1 false)
+ %val = load i32, ptr %array, align 16
+ ret i32 %val
+}
+
+; Used memmove, buffer is reset to zero.
+define i32 @used_memmove_1() {
+; CHECK-LABEL: @used_memmove_1(
+; CHECK-NEXT: [[ARRAY:%.*]] = alloca [26 x i32], align 16
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ARRAY]], i8 0, i64 104, i1 false)
+; CHECK-NEXT: [[ARRAY_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY]], i64 4
+; CHECK-NEXT: store i32 1, ptr [[ARRAY_IDX]], align 4
+; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 16 [[ARRAY]], ptr align 4 [[ARRAY_IDX]], i64 100, i1 false)
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAY_IDX]], align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+ %array = alloca [26 x i32], align 16
+ call void @llvm.memset.p0.i64(ptr align 16 %array, i8 0, i64 104, i1 false)
+ %array.idx = getelementptr inbounds i8, ptr %array, i64 4
+ store i32 1, ptr %array.idx
+ call void @llvm.memmove.p0.p0.i64(ptr align 16 %array, ptr align 4 %array.idx, i64 100, i1 false)
+ %val = load i32, ptr %array.idx, align 4
+ ret i32 %val
+}
+
+; Used memmove.
+define i32 @used_memmove_2() {
+; CHECK-LABEL: @used_memmove_2(
+; CHECK-NEXT: [[ARRAY:%.*]] = alloca [26 x i32], align 16
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ARRAY]], i8 0, i64 104, i1 false)
+; CHECK-NEXT: [[ARRAY_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY]], i64 4
+; CHECK-NEXT: store i32 1, ptr [[ARRAY]], align 4
+; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 16 [[ARRAY]], ptr align 4 [[ARRAY_IDX]], i64 100, i1 false)
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAY_IDX]], align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+ %array = alloca [26 x i32], align 16
+ call void @llvm.memset.p0.i64(ptr align 16 %array, i8 0, i64 104, i1 false)
+ %array.idx = getelementptr inbounds i8, ptr %array, i64 4
+ store i32 1, ptr %array
+ call void @llvm.memmove.p0.p0.i64(ptr align 16 %array, ptr align 4 %array.idx, i64 100, i1 false)
+ %val = load i32, ptr %array.idx, align 4
+ ret i32 %val
+}
+
+; Used memmove, buffer clobbered by opaque.
+define i32 @used_memmove_3() {
+; CHECK-LABEL: @used_memmove_3(
+; CHECK-NEXT: [[ARRAY:%.*]] = alloca [25 x i32], align 16
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ARRAY]], i8 0, i64 100, i1 false)
+; CHECK-NEXT: call void @opaque(ptr [[ARRAY]])
+; CHECK-NEXT: [[ARRAY_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY]], i64 4
+; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 16 [[ARRAY]], ptr align 4 [[ARRAY_IDX]], i64 96, i1 false)
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAY]], align 16
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+ %array = alloca [25 x i32], align 16
+ call void @llvm.memset.p0.i64(ptr align 16 %array, i8 0, i64 100, i1 false)
+ call void @opaque(ptr %array)
+ %array.idx = getelementptr inbounds i8, ptr %array, i64 4
+ call void @llvm.memmove.p0.p0.i64(ptr align 16 %array, ptr align 4 %array.idx, i64 96, i1 false)
+ %val = load i32, ptr %array, align 16
+ ret i32 %val
+}
+
+; Redundant memmove, not within the same basic block.
+define i32 @redundant_memmove_different_bbs() {
+; CHECK-LABEL: @redundant_memmove_different_bbs(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ARRAY:%.*]] = alloca [26 x i32], align 16
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ARRAY]], i8 0, i64 104, i1 false)
+; CHECK-NEXT: [[ARRAY_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY]], i64 4
+; CHECK-NEXT: br label [[USE:%.*]]
+; CHECK: use:
+; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 16 [[ARRAY]], ptr align 4 [[ARRAY_IDX]], i64 100, i1 false)
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAY]], align 16
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+entry:
+ %array = alloca [26 x i32], align 16
+ call void @llvm.memset.p0.i64(ptr align 16 %array, i8 0, i64 104, i1 false)
+ %array.idx = getelementptr inbounds i8, ptr %array, i64 4
+ br label %use
+
+use: ; preds = %entry
+ call void @llvm.memmove.p0.p0.i64(ptr align 16 %array, ptr align 4 %array.idx, i64 100, i1 false)
+ %val = load i32, ptr %array, align 16
+ ret i32 %val
+}
+
+declare void @opaque(ptr)
+declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1)
+declare void @llvm.memmove.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind
>From 26e1089fbc9144385fb8a94d8dc0098565800ba2 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Fri, 20 Sep 2024 17:42:17 +0200
Subject: [PATCH 2/3] [MemCpyOpt] Drop dead `memmove` calls on `memset`'d
source data
When a memmove happens to clobber source data, and such data have
been previously memset'd, the memmove may be redundant.
---
.../llvm/Transforms/Scalar/MemCpyOptimizer.h | 3 +-
.../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 72 ++++++++++++++++++-
.../memset-memmove-redundant-memmove.ll | 2 -
3 files changed, 71 insertions(+), 6 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
index 8ed03d7f3ddbff..63e6fa8e805eca 100644
--- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
+++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
@@ -66,7 +66,7 @@ class MemCpyOptPass : public PassInfoMixin<MemCpyOptPass> {
BasicBlock::iterator &BBI);
bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI);
bool processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI);
- bool processMemMove(MemMoveInst *M);
+ bool processMemMove(MemMoveInst *M, BasicBlock::iterator &BBI);
bool performCallSlotOptzn(Instruction *cpyLoad, Instruction *cpyStore,
Value *cpyDst, Value *cpySrc, TypeSize cpyLen,
Align cpyAlign, BatchAAResults &BAA,
@@ -85,6 +85,7 @@ class MemCpyOptPass : public PassInfoMixin<MemCpyOptPass> {
bool performStackMoveOptzn(Instruction *Load, Instruction *Store,
AllocaInst *DestAlloca, AllocaInst *SrcAlloca,
TypeSize Size, BatchAAResults &BAA);
+ bool isMemMoveMemSetDependency(MemMoveInst *M);
void eraseInstruction(Instruction *I);
bool iterateOnFunction(Function &F);
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 1d67773585d593..fe5f7c2284c7d0 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -69,6 +69,7 @@ static cl::opt<bool> EnableMemCpyOptWithoutLibcalls(
cl::desc("Enable memcpyopt even when libcalls are disabled"));
STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
+STATISTIC(NumMemMoveInstr, "Number of memmove instructions deleted");
STATISTIC(NumMemSetInfer, "Number of memsets inferred");
STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
@@ -1843,12 +1844,77 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
return false;
}
+/// Memmove calls with overlapping src/dest buffers that come after a memset may
+/// be removed.
+bool MemCpyOptPass::isMemMoveMemSetDependency(MemMoveInst *M) {
+ const auto &DL = M->getDataLayout();
+ MemSetInst *MS = nullptr;
+ MemoryUseOrDef *MemMoveAccess = MSSA->getMemoryAccess(M);
+ if (!MemMoveAccess)
+ return false;
+
+ BatchAAResults BAA(*AA);
+ MemoryAccess *FirstDef = MemMoveAccess->getDefiningAccess();
+ MemoryLocation SourceLoc = MemoryLocation::getForSource(M);
+ MemoryLocation DestLoc = MemoryLocation::getForDest(M);
+
+ // The first dominating clobbering MemoryAccess for the source location
+ // needs to be the memset.
+ MemoryAccess *SourceClobber =
+ MSSA->getWalker()->getClobberingMemoryAccess(FirstDef, SourceLoc, BAA);
+ if (auto *Def = dyn_cast<MemoryDef>(SourceClobber))
+ MS = dyn_cast_or_null<MemSetInst>(Def->getMemoryInst());
+ if (!MS)
+ return false;
+
+ // The destination buffer must have been memset'd.
+ if (!BAA.isMustAlias(MS->getDest(), M->getDest()))
+ return false;
+
+ // The memmove is of form memmove(x, x + A, B).
+ auto *Source = dyn_cast<GetElementPtrInst>(M->getSource());
+ if (!Source)
+ return false;
+ APInt Offset(DL.getIndexTypeSizeInBits(Source->getType()), 0);
+ auto MemMoveSize = MemoryLocation::getForSource(M).Size;
+ if (!Source->accumulateConstantOffset(DL, Offset) || Offset.isNegative() ||
+ Source->getPointerOperand() != M->getDest() || !MemMoveSize.hasValue())
+ return false;
+
+ LocationSize TotalSize =
+ LocationSize::precise(Offset.getZExtValue() + MemMoveSize.getValue());
+ MemoryLocation CombinedSourceLoc(M->getSource(), TotalSize);
+ MemoryLocation CombinedDestLoc(M->getDest(), TotalSize);
+ if (!isModOrRefSet(BAA.getModRefInfo(MS, CombinedSourceLoc)) ||
+ !isModOrRefSet(BAA.getModRefInfo(MS, CombinedDestLoc)))
+ return false;
+
+ // The first dominating clobbering MemoryAccess for the destination location
+ // needs to be the memset as well.
+ MemoryAccess *DestClobber =
+ MSSA->getWalker()->getClobberingMemoryAccess(FirstDef, DestLoc, BAA);
+ auto *Def = dyn_cast<MemoryDef>(DestClobber);
+ if (Def->getMemoryInst() != MS)
+ return false;
+ return true;
+}
+
/// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
/// not to alias.
-bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
+bool MemCpyOptPass::processMemMove(MemMoveInst *M, BasicBlock::iterator &BBI) {
// See if the source could be modified by this memmove potentially.
- if (isModSet(AA->getModRefInfo(M, MemoryLocation::getForSource(M))))
+ if (isModSet(AA->getModRefInfo(M, MemoryLocation::getForSource(M)))) {
+ // On the off-chance the memmove clobbers src with previously memset'd
+ // bytes, the memmove may be redundant.
+ if (!M->isVolatile() && isMemMoveMemSetDependency(M)) {
+ LLVM_DEBUG(dbgs() << "Removed redundant memmove.\n");
+ ++BBI;
+ eraseInstruction(M);
+ ++NumMemMoveInstr;
+ return true;
+ }
return false;
+ }
LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
<< "\n");
@@ -2066,7 +2132,7 @@ bool MemCpyOptPass::iterateOnFunction(Function &F) {
else if (auto *M = dyn_cast<MemCpyInst>(I))
RepeatInstruction = processMemCpy(M, BI);
else if (auto *M = dyn_cast<MemMoveInst>(I))
- RepeatInstruction = processMemMove(M);
+ RepeatInstruction = processMemMove(M, BI);
else if (auto *CB = dyn_cast<CallBase>(I)) {
for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) {
if (CB->isByValArgument(i))
diff --git a/llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll b/llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll
index 5b323f119f0950..5fcd8c80606a67 100644
--- a/llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll
@@ -7,7 +7,6 @@ define i32 @redundant_memmove() {
; CHECK-NEXT: [[ARRAY:%.*]] = alloca [26 x i32], align 16
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ARRAY]], i8 0, i64 104, i1 false)
; CHECK-NEXT: [[ARRAY_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY]], i64 4
-; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 16 [[ARRAY]], ptr align 4 [[ARRAY_IDX]], i64 100, i1 false)
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAY]], align 16
; CHECK-NEXT: ret i32 [[VAL]]
;
@@ -88,7 +87,6 @@ define i32 @redundant_memmove_different_bbs() {
; CHECK-NEXT: [[ARRAY_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY]], i64 4
; CHECK-NEXT: br label [[USE:%.*]]
; CHECK: use:
-; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 16 [[ARRAY]], ptr align 4 [[ARRAY_IDX]], i64 100, i1 false)
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAY]], align 16
; CHECK-NEXT: ret i32 [[VAL]]
;
>From 693081c95e31222586f96ca369267e750b11dcd9 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Wed, 27 Nov 2024 19:39:31 +0100
Subject: [PATCH 3/3] !fixup single mssa query over the combined dest location
---
.../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 47 ++++++++-----------
1 file changed, 19 insertions(+), 28 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index fe5f7c2284c7d0..2a7bce4512bffc 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1848,35 +1848,17 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
/// be removed.
bool MemCpyOptPass::isMemMoveMemSetDependency(MemMoveInst *M) {
const auto &DL = M->getDataLayout();
- MemSetInst *MS = nullptr;
MemoryUseOrDef *MemMoveAccess = MSSA->getMemoryAccess(M);
if (!MemMoveAccess)
return false;
- BatchAAResults BAA(*AA);
- MemoryAccess *FirstDef = MemMoveAccess->getDefiningAccess();
- MemoryLocation SourceLoc = MemoryLocation::getForSource(M);
- MemoryLocation DestLoc = MemoryLocation::getForDest(M);
-
- // The first dominating clobbering MemoryAccess for the source location
- // needs to be the memset.
- MemoryAccess *SourceClobber =
- MSSA->getWalker()->getClobberingMemoryAccess(FirstDef, SourceLoc, BAA);
- if (auto *Def = dyn_cast<MemoryDef>(SourceClobber))
- MS = dyn_cast_or_null<MemSetInst>(Def->getMemoryInst());
- if (!MS)
- return false;
-
- // The destination buffer must have been memset'd.
- if (!BAA.isMustAlias(MS->getDest(), M->getDest()))
- return false;
-
// The memmove is of form memmove(x, x + A, B).
+ MemoryLocation SourceLoc = MemoryLocation::getForSource(M);
auto *Source = dyn_cast<GetElementPtrInst>(M->getSource());
if (!Source)
return false;
APInt Offset(DL.getIndexTypeSizeInBits(Source->getType()), 0);
- auto MemMoveSize = MemoryLocation::getForSource(M).Size;
+ auto MemMoveSize = SourceLoc.Size;
if (!Source->accumulateConstantOffset(DL, Offset) || Offset.isNegative() ||
Source->getPointerOperand() != M->getDest() || !MemMoveSize.hasValue())
return false;
@@ -1885,16 +1867,25 @@ bool MemCpyOptPass::isMemMoveMemSetDependency(MemMoveInst *M) {
LocationSize::precise(Offset.getZExtValue() + MemMoveSize.getValue());
MemoryLocation CombinedSourceLoc(M->getSource(), TotalSize);
MemoryLocation CombinedDestLoc(M->getDest(), TotalSize);
- if (!isModOrRefSet(BAA.getModRefInfo(MS, CombinedSourceLoc)) ||
- !isModOrRefSet(BAA.getModRefInfo(MS, CombinedDestLoc)))
+
+ // The first dominating clobbering MemoryAccess for the combined location
+ // needs to be a memset.
+ BatchAAResults BAA(*AA);
+ MemSetInst *MS = nullptr;
+ MemoryAccess *FirstDef = MemMoveAccess->getDefiningAccess();
+ MemoryAccess *DestClobber = MSSA->getWalker()->getClobberingMemoryAccess(
+ FirstDef, CombinedDestLoc, BAA);
+ if (auto *Def = dyn_cast<MemoryDef>(DestClobber))
+ MS = dyn_cast_or_null<MemSetInst>(Def->getMemoryInst());
+ if (!MS)
return false;
- // The first dominating clobbering MemoryAccess for the destination location
- // needs to be the memset as well.
- MemoryAccess *DestClobber =
- MSSA->getWalker()->getClobberingMemoryAccess(FirstDef, DestLoc, BAA);
- auto *Def = dyn_cast<MemoryDef>(DestClobber);
- if (Def->getMemoryInst() != MS)
+ // The destination buffer must have been memset'd.
+ if (!BAA.isMustAlias(MS->getDest(), M->getDest()))
+ return false;
+
+ if (!isModOrRefSet(BAA.getModRefInfo(MS, CombinedSourceLoc)) ||
+ !isModOrRefSet(BAA.getModRefInfo(MS, CombinedDestLoc)))
return false;
return true;
}
More information about the llvm-commits
mailing list