[llvm] [MemCpyOpt] Drop dead `memmove` calls on `memset`'d source data (PR #101930)

Antonio Frighetto via llvm-commits llvm-commits at lists.llvm.org
Sat Nov 30 09:46:18 PST 2024


https://github.com/antoniofrighetto updated https://github.com/llvm/llvm-project/pull/101930

>From 2ee9f1854eb7dc95f3531024d57567646ad74fa2 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Fri, 20 Sep 2024 17:41:06 +0200
Subject: [PATCH 1/6] [MemCpyOpt] Introduce test for PR101930 (NFC)

---
 .../memset-memmove-redundant-memmove.ll       | 145 ++++++++++++++++++
 1 file changed, 145 insertions(+)
 create mode 100644 llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll

diff --git a/llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll b/llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll
new file mode 100644
index 00000000000000..59eb8e1c2f2dd0
--- /dev/null
+++ b/llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll
@@ -0,0 +1,145 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=memcpyopt -S %s -verify-memoryssa | FileCheck %s
+
+; Redundant memmove.
+define i32 @redundant_memmove() {
+; CHECK-LABEL: @redundant_memmove(
+; CHECK-NEXT:    [[ARRAY:%.*]] = alloca [26 x i32], align 16
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 16 [[ARRAY]], i8 0, i64 104, i1 false)
+; CHECK-NEXT:    [[ARRAY_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY]], i64 4
+; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr align 16 [[ARRAY]], ptr align 4 [[ARRAY_IDX]], i64 100, i1 false)
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[ARRAY]], align 16
+; CHECK-NEXT:    ret i32 [[VAL]]
+;
+  %array = alloca [26 x i32], align 16
+  call void @llvm.memset.p0.i64(ptr align 16 %array, i8 0, i64 104, i1 false)
+  %array.idx = getelementptr inbounds i8, ptr %array, i64 4
+  call void @llvm.memmove.p0.p0.i64(ptr align 16 %array, ptr align 4 %array.idx, i64 100, i1 false)
+  %val = load i32, ptr %array, align 16
+  ret i32 %val
+}
+
+; Used memmove, buffer is reset to zero.
+define i32 @used_memmove_1() {
+; CHECK-LABEL: @used_memmove_1(
+; CHECK-NEXT:    [[ARRAY:%.*]] = alloca [26 x i32], align 16
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 16 [[ARRAY]], i8 0, i64 104, i1 false)
+; CHECK-NEXT:    [[ARRAY_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY]], i64 4
+; CHECK-NEXT:    store i32 1, ptr [[ARRAY_IDX]], align 4
+; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr align 16 [[ARRAY]], ptr align 4 [[ARRAY_IDX]], i64 100, i1 false)
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[ARRAY_IDX]], align 4
+; CHECK-NEXT:    ret i32 [[VAL]]
+;
+  %array = alloca [26 x i32], align 16
+  call void @llvm.memset.p0.i64(ptr align 16 %array, i8 0, i64 104, i1 false)
+  %array.idx = getelementptr inbounds i8, ptr %array, i64 4
+  store i32 1, ptr %array.idx
+  call void @llvm.memmove.p0.p0.i64(ptr align 16 %array, ptr align 4 %array.idx, i64 100, i1 false)
+  %val = load i32, ptr %array.idx, align 4
+  ret i32 %val
+}
+
+; Used memmove.
+define i32 @used_memmove_2() {
+; CHECK-LABEL: @used_memmove_2(
+; CHECK-NEXT:    [[ARRAY:%.*]] = alloca [26 x i32], align 16
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 16 [[ARRAY]], i8 0, i64 104, i1 false)
+; CHECK-NEXT:    [[ARRAY_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY]], i64 4
+; CHECK-NEXT:    store i32 1, ptr [[ARRAY]], align 4
+; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr align 16 [[ARRAY]], ptr align 4 [[ARRAY_IDX]], i64 100, i1 false)
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[ARRAY_IDX]], align 4
+; CHECK-NEXT:    ret i32 [[VAL]]
+;
+  %array = alloca [26 x i32], align 16
+  call void @llvm.memset.p0.i64(ptr align 16 %array, i8 0, i64 104, i1 false)
+  %array.idx = getelementptr inbounds i8, ptr %array, i64 4
+  store i32 1, ptr %array
+  call void @llvm.memmove.p0.p0.i64(ptr align 16 %array, ptr align 4 %array.idx, i64 100, i1 false)
+  %val = load i32, ptr %array.idx, align 4
+  ret i32 %val
+}
+
+; Used memmove, buffer clobbered by opaque.
+define i32 @used_memmove_3() {
+; CHECK-LABEL: @used_memmove_3(
+; CHECK-NEXT:    [[ARRAY:%.*]] = alloca [25 x i32], align 16
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 16 [[ARRAY]], i8 0, i64 100, i1 false)
+; CHECK-NEXT:    call void @opaque(ptr [[ARRAY]])
+; CHECK-NEXT:    [[ARRAY_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY]], i64 4
+; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr align 16 [[ARRAY]], ptr align 4 [[ARRAY_IDX]], i64 96, i1 false)
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[ARRAY]], align 16
+; CHECK-NEXT:    ret i32 [[VAL]]
+;
+  %array = alloca [25 x i32], align 16
+  call void @llvm.memset.p0.i64(ptr align 16 %array, i8 0, i64 100, i1 false)
+  call void @opaque(ptr %array)
+  %array.idx = getelementptr inbounds i8, ptr %array, i64 4
+  call void @llvm.memmove.p0.p0.i64(ptr align 16 %array, ptr align 4 %array.idx, i64 96, i1 false)
+  %val = load i32, ptr %array, align 16
+  ret i32 %val
+}
+
+; Redundant memmove, not within the same basic block.
+define i32 @redundant_memmove_different_bbs() {
+; CHECK-LABEL: @redundant_memmove_different_bbs(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ARRAY:%.*]] = alloca [26 x i32], align 16
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 16 [[ARRAY]], i8 0, i64 104, i1 false)
+; CHECK-NEXT:    [[ARRAY_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY]], i64 4
+; CHECK-NEXT:    br label [[USE:%.*]]
+; CHECK:       use:
+; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr align 16 [[ARRAY]], ptr align 4 [[ARRAY_IDX]], i64 100, i1 false)
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[ARRAY]], align 16
+; CHECK-NEXT:    ret i32 [[VAL]]
+;
+entry:
+  %array = alloca [26 x i32], align 16
+  call void @llvm.memset.p0.i64(ptr align 16 %array, i8 0, i64 104, i1 false)
+  %array.idx = getelementptr inbounds i8, ptr %array, i64 4
+  br label %use
+
+use:                                              ; preds = %entry
+  call void @llvm.memmove.p0.p0.i64(ptr align 16 %array, ptr align 4 %array.idx, i64 100, i1 false)
+  %val = load i32, ptr %array, align 16
+  ret i32 %val
+}
+
+ at g_var = global [26 x i32] zeroinitializer, align 16
+
+; Redundant memmove on a global variable.
+define ptr @redundant_memmove_memset_global_variable() {
+; CHECK-LABEL: @redundant_memmove_memset_global_variable(
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 16 @g_var, i8 0, i64 104, i1 false)
+; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr align 16 @g_var, ptr align 4 getelementptr inbounds nuw (i8, ptr @g_var, i64 4), i64 100, i1 false)
+; CHECK-NEXT:    ret ptr @g_var
+;
+  call void @llvm.memset.p0.i64(ptr align 16 @g_var, i8 0, i64 104, i1 false)
+  call void @llvm.memmove.p0.p0.i64(ptr align 16 @g_var, ptr align 4 getelementptr inbounds nuw (i8, ptr @g_var, i64 4), i64 100, i1 false)
+  ret ptr @g_var
+}
+
+; Memset only partial.
+define i32 @partial_memset() {
+; CHECK-LABEL: @partial_memset(
+; CHECK-NEXT:    [[ARRAY:%.*]] = alloca [26 x i32], align 16
+; CHECK-NEXT:    [[ARRAY_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY]], i64 92
+; CHECK-NEXT:    store i32 1, ptr [[ARRAY_IDX]], align 4
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 16 [[ARRAY]], i8 0, i64 26, i1 false)
+; CHECK-NEXT:    [[ARRAY_IDX_2:%.*]] = getelementptr inbounds i8, ptr [[ARRAY]], i64 4
+; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr align 16 [[ARRAY]], ptr align 4 [[ARRAY_IDX_2]], i64 100, i1 false)
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[ARRAY_IDX]], align 4
+; CHECK-NEXT:    ret i32 [[VAL]]
+;
+  %array = alloca [26 x i32], align 16
+  %array.idx = getelementptr inbounds i8, ptr %array, i64 92
+  store i32 1, ptr %array.idx
+  call void @llvm.memset.p0.i64(ptr align 16 %array, i8 0, i64 26, i1 false)
+  %array.idx.2 = getelementptr inbounds i8, ptr %array, i64 4
+  call void @llvm.memmove.p0.p0.i64(ptr align 16 %array, ptr align 4 %array.idx.2, i64 100, i1 false)
+  %val = load i32, ptr %array.idx, align 4
+  ret i32 %val
+}
+
+declare void @opaque(ptr)
+declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1)
+declare void @llvm.memmove.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1)

>From aaaa51ffb611dc8f7e1086ff8ea4b17f9163ec40 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Fri, 20 Sep 2024 17:42:17 +0200
Subject: [PATCH 2/6] [MemCpyOpt] Drop dead `memmove` calls on `memset`'d
 source data

When a memmove happens to clobber source data, and such data have
been previously memset'd, the memmove may be redundant.
---
 .../llvm/Transforms/Scalar/MemCpyOptimizer.h  |  3 +-
 .../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 82 ++++++++++++++++++-
 .../memset-memmove-redundant-memmove.ll       |  3 -
 3 files changed, 81 insertions(+), 7 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
index 023c9de28209c8..496d2958fc2d0f 100644
--- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
+++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
@@ -68,7 +68,7 @@ class MemCpyOptPass : public PassInfoMixin<MemCpyOptPass> {
                           BasicBlock::iterator &BBI);
   bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI);
   bool processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI);
-  bool processMemMove(MemMoveInst *M);
+  bool processMemMove(MemMoveInst *M, BasicBlock::iterator &BBI);
   bool performCallSlotOptzn(Instruction *cpyLoad, Instruction *cpyStore,
                             Value *cpyDst, Value *cpySrc, TypeSize cpyLen,
                             Align cpyAlign, BatchAAResults &BAA,
@@ -87,6 +87,7 @@ class MemCpyOptPass : public PassInfoMixin<MemCpyOptPass> {
   bool performStackMoveOptzn(Instruction *Load, Instruction *Store,
                              AllocaInst *DestAlloca, AllocaInst *SrcAlloca,
                              TypeSize Size, BatchAAResults &BAA);
+  bool isMemMoveMemSetDependency(MemMoveInst *M);
 
   void eraseInstruction(Instruction *I);
   bool iterateOnFunction(Function &F);
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index e9e1071ea210c4..2011e6e42a25ce 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -68,6 +68,7 @@ static cl::opt<bool> EnableMemCpyOptWithoutLibcalls(
     cl::desc("Enable memcpyopt even when libcalls are disabled"));
 
 STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
+STATISTIC(NumMemMoveInstr, "Number of memmove instructions deleted");
 STATISTIC(NumMemSetInfer, "Number of memsets inferred");
 STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
 STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
@@ -1841,12 +1842,87 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
   return false;
 }
 
+/// Memmove calls with overlapping src/dest buffers that come after a memset may
+/// be removed.
+bool MemCpyOptPass::isMemMoveMemSetDependency(MemMoveInst *M) {
+  const auto &DL = M->getDataLayout();
+  MemoryUseOrDef *MemMoveAccess = MSSA->getMemoryAccess(M);
+  if (!MemMoveAccess)
+    return false;
+
+  // The memmove is of form memmove(x, x + A, B).
+  MemoryLocation SourceLoc = MemoryLocation::getForSource(M);
+  auto *MemMoveSourceOp = M->getSource();
+  auto *Source = dyn_cast<GetElementPtrInst>(MemMoveSourceOp);
+  bool MemMoveHasGEPOperator = false;
+  if (!Source) {
+    if (auto *CE = dyn_cast<ConstantExpr>(MemMoveSourceOp))
+      if (isa<GEPOperator>(CE)) {
+        Source = cast<GetElementPtrInst>(CE->getAsInstruction());
+        MemMoveHasGEPOperator = true;
+      }
+    if (!Source)
+      return false;
+  }
+
+  APInt Offset(DL.getIndexTypeSizeInBits(Source->getType()), 0);
+  LocationSize MemMoveLocSize = SourceLoc.Size;
+  if (Source->getPointerOperand() != M->getDest() ||
+      !MemMoveLocSize.hasValue() || Offset.isNegative() ||
+      !Source->accumulateConstantOffset(DL, Offset)) {
+    if (MemMoveHasGEPOperator)
+      Source->dropAllReferences();
+    return false;
+  }
+
+  if (MemMoveHasGEPOperator)
+    Source->dropAllReferences();
+
+  const uint64_t MemMoveSize = MemMoveLocSize.getValue();
+  LocationSize TotalSize =
+      LocationSize::precise(Offset.getZExtValue() + MemMoveSize);
+  MemoryLocation CombinedSourceLoc(MemMoveSourceOp, TotalSize);
+  MemoryLocation CombinedDestLoc(M->getDest(), TotalSize);
+
+  // The first dominating clobbering MemoryAccess for the combined location
+  // needs to be a memset.
+  BatchAAResults BAA(*AA);
+  MemSetInst *MS = nullptr;
+  MemoryAccess *FirstDef = MemMoveAccess->getDefiningAccess();
+  MemoryAccess *DestClobber = MSSA->getWalker()->getClobberingMemoryAccess(
+      FirstDef, CombinedDestLoc, BAA);
+  if (auto *Def = dyn_cast<MemoryDef>(DestClobber))
+    MS = dyn_cast_or_null<MemSetInst>(Def->getMemoryInst());
+  if (!MS)
+    return false;
+
+  // Memset length must be sufficiently large.
+  if (cast<ConstantInt>(MS->getLength())->getZExtValue() < MemMoveSize)
+    return false;
+
+  // The destination buffer must have been memset'd.
+  if (!BAA.isMustAlias(MS->getDest(), M->getDest()))
+    return false;
+
+  return true;
+}
+
 /// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
 /// not to alias.
-bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
+bool MemCpyOptPass::processMemMove(MemMoveInst *M, BasicBlock::iterator &BBI) {
   // See if the source could be modified by this memmove potentially.
-  if (isModSet(AA->getModRefInfo(M, MemoryLocation::getForSource(M))))
+  if (isModSet(AA->getModRefInfo(M, MemoryLocation::getForSource(M)))) {
+    // On the off-chance the memmove clobbers src with previously memset'd
+    // bytes, the memmove may be redundant.
+    if (!M->isVolatile() && isMemMoveMemSetDependency(M)) {
+      LLVM_DEBUG(dbgs() << "Removed redundant memmove.\n");
+      ++BBI;
+      eraseInstruction(M);
+      ++NumMemMoveInstr;
+      return true;
+    }
     return false;
+  }
 
   LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
                     << "\n");
@@ -2064,7 +2140,7 @@ bool MemCpyOptPass::iterateOnFunction(Function &F) {
       else if (auto *M = dyn_cast<MemCpyInst>(I))
         RepeatInstruction = processMemCpy(M, BI);
       else if (auto *M = dyn_cast<MemMoveInst>(I))
-        RepeatInstruction = processMemMove(M);
+        RepeatInstruction = processMemMove(M, BI);
       else if (auto *CB = dyn_cast<CallBase>(I)) {
         for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) {
           if (CB->isByValArgument(i))
diff --git a/llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll b/llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll
index 59eb8e1c2f2dd0..f0324529e6335c 100644
--- a/llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll
@@ -7,7 +7,6 @@ define i32 @redundant_memmove() {
 ; CHECK-NEXT:    [[ARRAY:%.*]] = alloca [26 x i32], align 16
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 16 [[ARRAY]], i8 0, i64 104, i1 false)
 ; CHECK-NEXT:    [[ARRAY_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY]], i64 4
-; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr align 16 [[ARRAY]], ptr align 4 [[ARRAY_IDX]], i64 100, i1 false)
 ; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[ARRAY]], align 16
 ; CHECK-NEXT:    ret i32 [[VAL]]
 ;
@@ -88,7 +87,6 @@ define i32 @redundant_memmove_different_bbs() {
 ; CHECK-NEXT:    [[ARRAY_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY]], i64 4
 ; CHECK-NEXT:    br label [[USE:%.*]]
 ; CHECK:       use:
-; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr align 16 [[ARRAY]], ptr align 4 [[ARRAY_IDX]], i64 100, i1 false)
 ; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[ARRAY]], align 16
 ; CHECK-NEXT:    ret i32 [[VAL]]
 ;
@@ -110,7 +108,6 @@ use:                                              ; preds = %entry
 define ptr @redundant_memmove_memset_global_variable() {
 ; CHECK-LABEL: @redundant_memmove_memset_global_variable(
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 16 @g_var, i8 0, i64 104, i1 false)
-; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr align 16 @g_var, ptr align 4 getelementptr inbounds nuw (i8, ptr @g_var, i64 4), i64 100, i1 false)
 ; CHECK-NEXT:    ret ptr @g_var
 ;
   call void @llvm.memset.p0.i64(ptr align 16 @g_var, i8 0, i64 104, i1 false)

>From 35100f77c3842e43f7c4a99f2a6894e26d69b29d Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Fri, 29 Nov 2024 17:13:24 +0100
Subject: [PATCH 3/6] !fixup use gepoperator

---
 llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 2011e6e42a25ce..4a5a72285c240b 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1853,31 +1853,18 @@ bool MemCpyOptPass::isMemMoveMemSetDependency(MemMoveInst *M) {
   // The memmove is of form memmove(x, x + A, B).
   MemoryLocation SourceLoc = MemoryLocation::getForSource(M);
   auto *MemMoveSourceOp = M->getSource();
-  auto *Source = dyn_cast<GetElementPtrInst>(MemMoveSourceOp);
-  bool MemMoveHasGEPOperator = false;
-  if (!Source) {
-    if (auto *CE = dyn_cast<ConstantExpr>(MemMoveSourceOp))
-      if (isa<GEPOperator>(CE)) {
-        Source = cast<GetElementPtrInst>(CE->getAsInstruction());
-        MemMoveHasGEPOperator = true;
-      }
-    if (!Source)
+  auto *Source = dyn_cast<GEPOperator>(MemMoveSourceOp);
+  if (!Source)
       return false;
-  }
 
   APInt Offset(DL.getIndexTypeSizeInBits(Source->getType()), 0);
   LocationSize MemMoveLocSize = SourceLoc.Size;
   if (Source->getPointerOperand() != M->getDest() ||
       !MemMoveLocSize.hasValue() || Offset.isNegative() ||
       !Source->accumulateConstantOffset(DL, Offset)) {
-    if (MemMoveHasGEPOperator)
-      Source->dropAllReferences();
     return false;
   }
 
-  if (MemMoveHasGEPOperator)
-    Source->dropAllReferences();
-
   const uint64_t MemMoveSize = MemMoveLocSize.getValue();
   LocationSize TotalSize =
       LocationSize::precise(Offset.getZExtValue() + MemMoveSize);

>From fcca0367c942192ebc8dc105a4932d964a9ba72c Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Fri, 29 Nov 2024 17:20:41 +0100
Subject: [PATCH 4/6] !fixup clang-format

---
 llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 4a5a72285c240b..5d99e9ea654bdc 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1855,7 +1855,7 @@ bool MemCpyOptPass::isMemMoveMemSetDependency(MemMoveInst *M) {
   auto *MemMoveSourceOp = M->getSource();
   auto *Source = dyn_cast<GEPOperator>(MemMoveSourceOp);
   if (!Source)
-      return false;
+    return false;
 
   APInt Offset(DL.getIndexTypeSizeInBits(Source->getType()), 0);
   LocationSize MemMoveLocSize = SourceLoc.Size;

>From 5a79e0ec9966e9e2720b08ccde49a404dd83235b Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Fri, 29 Nov 2024 18:50:31 +0100
Subject: [PATCH 5/6] !fixup dyn_cast + test

---
 .../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 21 +++++++++++--------
 .../memset-memmove-redundant-memmove.ll       | 18 ++++++++++++++++
 2 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 5d99e9ea654bdc..f9d094044b6cc7 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1860,12 +1860,12 @@ bool MemCpyOptPass::isMemMoveMemSetDependency(MemMoveInst *M) {
   APInt Offset(DL.getIndexTypeSizeInBits(Source->getType()), 0);
   LocationSize MemMoveLocSize = SourceLoc.Size;
   if (Source->getPointerOperand() != M->getDest() ||
-      !MemMoveLocSize.hasValue() || Offset.isNegative() ||
-      !Source->accumulateConstantOffset(DL, Offset)) {
+      !MemMoveLocSize.hasValue() ||
+      !Source->accumulateConstantOffset(DL, Offset) || Offset.isNegative()) {
     return false;
   }
 
-  const uint64_t MemMoveSize = MemMoveLocSize.getValue();
+  uint64_t MemMoveSize = MemMoveLocSize.getValue();
   LocationSize TotalSize =
       LocationSize::precise(Offset.getZExtValue() + MemMoveSize);
   MemoryLocation CombinedSourceLoc(MemMoveSourceOp, TotalSize);
@@ -1874,17 +1874,20 @@ bool MemCpyOptPass::isMemMoveMemSetDependency(MemMoveInst *M) {
   // The first dominating clobbering MemoryAccess for the combined location
   // needs to be a memset.
   BatchAAResults BAA(*AA);
-  MemSetInst *MS = nullptr;
   MemoryAccess *FirstDef = MemMoveAccess->getDefiningAccess();
-  MemoryAccess *DestClobber = MSSA->getWalker()->getClobberingMemoryAccess(
-      FirstDef, CombinedDestLoc, BAA);
-  if (auto *Def = dyn_cast<MemoryDef>(DestClobber))
-    MS = dyn_cast_or_null<MemSetInst>(Def->getMemoryInst());
+  auto *DestClobber =
+      dyn_cast<MemoryDef>(MSSA->getWalker()->getClobberingMemoryAccess(
+          FirstDef, CombinedDestLoc, BAA));
+  if (!DestClobber)
+    return false;
+
+  auto *MS = dyn_cast_or_null<MemSetInst>(DestClobber->getMemoryInst());
   if (!MS)
     return false;
 
   // Memset length must be sufficiently large.
-  if (cast<ConstantInt>(MS->getLength())->getZExtValue() < MemMoveSize)
+  auto *MemSetLength = dyn_cast<ConstantInt>(MS->getLength());
+  if (!MemSetLength || MemSetLength->getZExtValue() < MemMoveSize)
     return false;
 
   // The destination buffer must have been memset'd.
diff --git a/llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll b/llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll
index f0324529e6335c..83c5c235e5ab13 100644
--- a/llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll
@@ -137,6 +137,24 @@ define i32 @partial_memset() {
   ret i32 %val
 }
 
+; Memset length not constant.
+define i32 @memset_length_not_constant(i64 %size) {
+; CHECK-LABEL: @memset_length_not_constant(
+; CHECK-NEXT:    [[ARRAY:%.*]] = alloca [26 x i32], align 16
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 16 [[ARRAY]], i8 0, i64 [[SIZE:%.*]], i1 false)
+; CHECK-NEXT:    [[ARRAY_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY]], i64 4
+; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr align 16 [[ARRAY]], ptr align 4 [[ARRAY_IDX]], i64 100, i1 false)
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[ARRAY]], align 16
+; CHECK-NEXT:    ret i32 [[VAL]]
+;
+  %array = alloca [26 x i32], align 16
+  call void @llvm.memset.p0.i64(ptr align 16 %array, i8 0, i64 %size, i1 false)
+  %array.idx = getelementptr inbounds i8, ptr %array, i64 4
+  call void @llvm.memmove.p0.p0.i64(ptr align 16 %array, ptr align 4 %array.idx, i64 100, i1 false)
+  %val = load i32, ptr %array, align 16
+  ret i32 %val
+}
+
 declare void @opaque(ptr)
 declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1)
 declare void @llvm.memmove.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1)

>From 1db88b4e19ba6e8456e7fd02ec00f9a62faed3cc Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Sat, 30 Nov 2024 18:45:08 +0100
Subject: [PATCH 6/6] !fixup negative mustalias test

---
 .../lib/Transforms/Scalar/MemCpyOptimizer.cpp |  8 +++-----
 .../memset-memmove-redundant-memmove.ll       | 20 +++++++++++++++++++
 2 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index f9d094044b6cc7..0cba5d077da62b 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1868,16 +1868,14 @@ bool MemCpyOptPass::isMemMoveMemSetDependency(MemMoveInst *M) {
   uint64_t MemMoveSize = MemMoveLocSize.getValue();
   LocationSize TotalSize =
       LocationSize::precise(Offset.getZExtValue() + MemMoveSize);
-  MemoryLocation CombinedSourceLoc(MemMoveSourceOp, TotalSize);
-  MemoryLocation CombinedDestLoc(M->getDest(), TotalSize);
+  MemoryLocation CombinedLoc(M->getDest(), TotalSize);
 
   // The first dominating clobbering MemoryAccess for the combined location
   // needs to be a memset.
   BatchAAResults BAA(*AA);
   MemoryAccess *FirstDef = MemMoveAccess->getDefiningAccess();
-  auto *DestClobber =
-      dyn_cast<MemoryDef>(MSSA->getWalker()->getClobberingMemoryAccess(
-          FirstDef, CombinedDestLoc, BAA));
+  auto *DestClobber = dyn_cast<MemoryDef>(
+      MSSA->getWalker()->getClobberingMemoryAccess(FirstDef, CombinedLoc, BAA));
   if (!DestClobber)
     return false;
 
diff --git a/llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll b/llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll
index 83c5c235e5ab13..c7593e2941518a 100644
--- a/llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memset-memmove-redundant-memmove.ll
@@ -155,6 +155,26 @@ define i32 @memset_length_not_constant(i64 %size) {
   ret i32 %val
 }
 
+; Memmove buffer not memset'd, different buffers.
+define i32 @memset_memmove_dest_buffers_not_alias() {
+; CHECK-LABEL: @memset_memmove_dest_buffers_not_alias(
+; CHECK-NEXT:    [[ARRAY:%.*]] = alloca [26 x i32], align 16
+; CHECK-NEXT:    [[ARRAY2:%.*]] = alloca [26 x i32], align 16
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 16 [[ARRAY]], i8 0, i64 104, i1 false)
+; CHECK-NEXT:    [[ARRAY2_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY2]], i64 4
+; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr align 16 [[ARRAY2]], ptr align 4 [[ARRAY2_IDX]], i64 100, i1 false)
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[ARRAY2]], align 16
+; CHECK-NEXT:    ret i32 [[VAL]]
+;
+  %array = alloca [26 x i32], align 16
+  %array2 = alloca [26 x i32], align 16
+  call void @llvm.memset.p0.i64(ptr align 16 %array, i8 0, i64 104, i1 false)
+  %array2.idx = getelementptr inbounds i8, ptr %array2, i64 4
+  call void @llvm.memmove.p0.p0.i64(ptr align 16 %array2, ptr align 4 %array2.idx, i64 100, i1 false)
+  %val = load i32, ptr %array2, align 16
+  ret i32 %val
+}
+
 declare void @opaque(ptr)
 declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1)
 declare void @llvm.memmove.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1)



More information about the llvm-commits mailing list