[llvm] [memcpyopt] handle memcpy from memset in more cases (PR #140954)

Jameson Nash via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 2 14:23:45 PDT 2025


https://github.com/vtjnash updated https://github.com/llvm/llvm-project/pull/140954

>From 0d828e6dcb86d886f1e5ee57ec7e71b945bfd36c Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash at gmail.com>
Date: Wed, 21 May 2025 15:45:26 +0000
Subject: [PATCH 1/2] [memcpyopt] handle memcpy from memset in more cases

This aims to reduce the divergence between this function and
processMemCpyMemCpyDependence, with the goal to eventually reduce
duplication here and combine them and improve this pass.
---
 .../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 103 ++++++++++++------
 .../Transforms/MemCpyOpt/lifetime-missing.ll  |   6 +-
 .../MemCpyOpt/memset-memcpy-oversized.ll      |  48 ++++++++
 .../MemCpyOpt/memset-memcpy-to-2x-memset.ll   |   2 +-
 llvm/test/Transforms/MemCpyOpt/mixed-sizes.ll |   2 +-
 .../MemCpyOpt/variable-sized-memset-memcpy.ll |   4 +-
 6 files changed, 124 insertions(+), 41 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 9c8cd53d56b56..95d956f18aa69 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1367,8 +1367,9 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
   return true;
 }
 
-/// Determine whether the instruction has undefined content for the given Size,
-/// either because it was freshly alloca'd or started its lifetime.
+/// Determine whether the pointer V had only undefined content from Def up to
+/// the given Size, either because it was freshly alloca'd or started its
+/// lifetime.
 static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V,
                              MemoryDef *Def, Value *Size) {
   if (MSSA->isLiveOnEntryDef(Def))
@@ -1403,6 +1404,24 @@ static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V,
   return false;
 }
 
+static bool coversInputFully(MemorySSA *MSSA, MemCpyInst *MemCpy,
+                             MemIntrinsic *MemSrc, BatchAAResults &BAA) {
+  // If the memcpy is larger than the previous, but the memory was undef prior
+  // to that, we can just ignore the tail. Technically we're only
+  // interested in the bytes from 0..MemSrcOffset and
+  // MemSrcLength+MemSrcOffset..CopySize here, but as we can't easily represent
+  // this location, we use the full 0..CopySize range.
+  Value *CopySize = MemCpy->getLength();
+  MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
+  MemoryUseOrDef *MemSrcAccess = MSSA->getMemoryAccess(MemSrc);
+  MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
+      MemSrcAccess->getDefiningAccess(), MemCpyLoc, BAA);
+  if (auto *MD = dyn_cast<MemoryDef>(Clobber))
+    if (hasUndefContents(MSSA, BAA, MemCpy->getSource(), MD, CopySize))
+      return true;
+  return false;
+}
+
 /// Transform memcpy to memset when its source was just memset.
 /// In other words, turn:
 /// \code
@@ -1418,51 +1437,63 @@ static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V,
 bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
                                                MemSetInst *MemSet,
                                                BatchAAResults &BAA) {
-  // Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
-  // memcpying from the same address. Otherwise it is hard to reason about.
-  if (!BAA.isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
-    return false;
-
   Value *MemSetSize = MemSet->getLength();
   Value *CopySize = MemCpy->getLength();
 
-  if (MemSetSize != CopySize) {
-    // Make sure the memcpy doesn't read any more than what the memset wrote.
-    // Don't worry about sizes larger than i64.
-
-    // A known memset size is required.
-    auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
-    if (!CMemSetSize)
+  int64_t MOffset = 0;
+  const DataLayout &DL = MemCpy->getModule()->getDataLayout();
+  // We can only transforms memcpy's where the dest of one is the source of the
+  // other, or they have a known offset.
+  if (MemCpy->getSource() != MemSet->getDest()) {
+    std::optional<int64_t> Offset =
+        MemCpy->getSource()->getPointerOffsetFrom(MemSet->getDest(), DL);
+    if (!Offset)
       return false;
+    MOffset = *Offset;
+  }
 
-    // A known memcpy size is also required.
+  MaybeAlign MDestAlign = MemCpy->getDestAlign();
+  int64_t MOffsetAligned = MDestAlign.valueOrOne().value() > 1 && MOffset < 0 ? -(-MOffset & ~(MDestAlign.valueOrOne().value() - 1)) : MOffset; // Compute the MOffset that keeps MDest aligned (truncate towards zero)
+  if (MOffset != 0 || MemSetSize != CopySize) {
+    // Make sure the memcpy doesn't read any more than what the memset wrote, other than undef.
+    auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
     auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
-    if (!CCopySize)
-      return false;
-    if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) {
-      // If the memcpy is larger than the memset, but the memory was undef prior
-      // to the memset, we can just ignore the tail. Technically we're only
-      // interested in the bytes from MemSetSize..CopySize here, but as we can't
-      // easily represent this location, we use the full 0..CopySize range.
-      MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
-      bool CanReduceSize = false;
-      MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
-      MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
-          MemSetAccess->getDefiningAccess(), MemCpyLoc, BAA);
-      if (auto *MD = dyn_cast<MemoryDef>(Clobber))
-        if (hasUndefContents(MSSA, BAA, MemCpy->getSource(), MD, CopySize))
-          CanReduceSize = true;
-
-      if (!CanReduceSize)
+    // Don't worry about sizes larger than i64.
+    if (!CMemSetSize || !CCopySize || MOffset < 0 ||
+        CCopySize->getZExtValue() + MOffset > CMemSetSize->getZExtValue()) {
+      if (!coversInputFully(MSSA, MemCpy, MemSet, BAA))
         return false;
-      CopySize = MemSetSize;
+
+      if (CMemSetSize && CCopySize) {
+        // If both have constant sizes and offsets, clip the memcpy to the bounds of the memset if applicable.
+        if (CCopySize->getZExtValue() + std::abs(MOffset) > CMemSetSize->getZExtValue()) {
+          if (MOffsetAligned == 0 || (MOffset < 0 && CCopySize->getZExtValue() + MOffset > CMemSetSize->getZExtValue()))
+            CopySize = MemSetSize;
+          else
+            CopySize = ConstantInt::get(CopySize->getType(), std::max((int64_t)0, (int64_t)(CMemSetSize->getZExtValue() - std::abs(MOffsetAligned))));
+        }
+        else if (MOffsetAligned < 0) {
+          // Even if CMemSetSize isn't known, if the MOffsetAligned is negative, make sure to clip the new memset
+          CopySize = ConstantInt::get(CopySize->getType(), CCopySize->getZExtValue() + MOffsetAligned);
+        }
+      }
+      else if (CCopySize && MOffsetAligned < 0) {
+        // Even if CMemSetSize isn't known, if the MOffsetAligned is negative, can still clip the new memset
+        CopySize = ConstantInt::get(CopySize->getType(), CCopySize->getZExtValue() + MOffsetAligned);
+      }
+      else {
+        MOffsetAligned = 0;
+      }
     }
   }
 
   IRBuilder<> Builder(MemCpy);
+  Value *MDest = MemCpy->getRawDest();
+  if (MOffsetAligned < 0)
+    MDest = Builder.CreateInBoundsPtrAdd(MDest, Builder.getInt64(-MOffsetAligned));
   Instruction *NewM =
-      Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
-                           CopySize, MemCpy->getDestAlign());
+      Builder.CreateMemSet(MDest, MemSet->getOperand(1),
+                           CopySize, MDestAlign);
   auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(MemCpy));
   auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
   MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
@@ -1683,7 +1714,7 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
     I->setMetadata(LLVMContext::MD_tbaa_struct, nullptr);
   }
 
-  LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n");
+  LLVM_DEBUG(dbgs() << "Stack Move: Performed stack-move optimization\n");
   NumStackMove++;
   return true;
 }
diff --git a/llvm/test/Transforms/MemCpyOpt/lifetime-missing.ll b/llvm/test/Transforms/MemCpyOpt/lifetime-missing.ll
index 0626f09702f7e..3ad49a63b357a 100644
--- a/llvm/test/Transforms/MemCpyOpt/lifetime-missing.ll
+++ b/llvm/test/Transforms/MemCpyOpt/lifetime-missing.ll
@@ -14,8 +14,12 @@ define void @test() {
 ; CHECK-LABEL: define void @test() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[AGG_TMP_SROA_14:%.*]] = alloca [20 x i8], align 4
-; CHECK-NEXT:    [[AGG_TMP_SROA_14_128_SROA_IDX:%.*]] = getelementptr i8, ptr [[AGG_TMP_SROA_14]], i64 4
+; CHECK-NEXT:    [[AGG_TMP_SROA_15:%.*]] = alloca [20 x i8], align 4
+; CHECK-NEXT:    [[AGG_TMP_SROA_14_128_SROA_IDX:%.*]] = getelementptr i8, ptr [[AGG_TMP_SROA_15]], i64 4
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[AGG_TMP_SROA_14_128_SROA_IDX]], i8 0, i64 1, i1 false)
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 20, ptr [[AGG_TMP_SROA_14]])
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[AGG_TMP_SROA_14]], i64 4
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[TMP0]], i8 0, i64 1, i1 false)
 ; CHECK-NEXT:    [[AGG_TMP3_SROA_35_128_SROA_IDX:%.*]] = getelementptr i8, ptr [[AGG_TMP_SROA_14]], i64 4
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr inttoptr (i64 4 to ptr), i8 0, i64 1, i1 false)
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr null, i8 0, i64 1, i1 false)
diff --git a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll
index 1c3896407e950..424cdde5fa780 100644
--- a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll
@@ -187,6 +187,54 @@ define void @test_write_before_memset_in_both_regions(ptr %result) {
   ret void
 }
 
+define void @test_offset_memset(ptr %result) {
+; CHECK-LABEL: @test_offset_memset(
+; CHECK-NEXT:    [[A1:%.*]] = alloca [4 x i32], align 8
+; CHECK-NEXT:    [[A:%.*]] = getelementptr i32, ptr [[A1]], i32 1
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[A]], i8 0, i64 12, i1 false)
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[RESULT:%.*]], i64 4
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[TMP1]], i8 0, i64 8, i1 false)
+; CHECK-NEXT:    ret void
+;
+  %a = alloca [ 4 x i32 ], align 8
+  %b = getelementptr i32, ptr %a, i32 1
+  call void @llvm.memset.p0.i64(ptr align 8 %b, i8 0, i64 12, i1 false)
+  call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %a, i64 12, i1 false)
+  ret void
+}
+
+define void @test_offset_memsetcpy(ptr %result) {
+; CHECK-LABEL: @test_offset_memsetcpy(
+; CHECK-NEXT:    [[A1:%.*]] = alloca [4 x i32], align 8
+; CHECK-NEXT:    [[A:%.*]] = getelementptr i32, ptr [[A1]], i32 1
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[A1]], i8 0, i64 12, i1 false)
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[RESULT:%.*]], i8 0, i64 8, i1 false)
+; CHECK-NEXT:    ret void
+;
+  %a = alloca [ 4 x i32 ], align 8
+  %b = getelementptr i32, ptr %a, i32 1
+  call void @llvm.memset.p0.i64(ptr align 8 %a, i8 0, i64 12, i1 false)
+  call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %b, i64 12, i1 false)
+  ret void
+}
+
+define void @test_two_memset(ptr %result) {
+; CHECK-LABEL: @test_two_memset(
+; CHECK-NEXT:    [[A:%.*]] = alloca [4 x i32], align 8
+; CHECK-NEXT:    [[B:%.*]] = getelementptr i32, ptr [[A]], i32 3
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[A]], i8 0, i64 12, i1 false)
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[B]], i8 1, i64 4, i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr [[RESULT:%.*]], ptr align 8 [[A]], i64 16, i1 false)
+; CHECK-NEXT:    ret void
+;
+  %a = alloca [ 4 x i32 ], align 8
+  %b = getelementptr i32, ptr %a, i32 3
+  call void @llvm.memset.p0.i64(ptr align 8 %a, i8 0, i64 12, i1 false)
+  call void @llvm.memset.p0.i64(ptr align 8 %b, i8 1, i64 4, i1 false)
+  call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %a, i64 16, i1 false)
+  ret void
+}
+
 declare ptr @malloc(i64)
 declare void @free(ptr)
 
diff --git a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-to-2x-memset.ll b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-to-2x-memset.ll
index 47474e8dac051..dc55d0524ddfb 100644
--- a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-to-2x-memset.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-to-2x-memset.ll
@@ -73,7 +73,7 @@ define void @test_different_source_gep(ptr %dst1, ptr %dst2, i8 %c) {
 ; CHECK-LABEL: @test_different_source_gep(
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[DST1:%.*]], i8 [[C:%.*]], i64 128, i1 false)
 ; CHECK-NEXT:    [[P:%.*]] = getelementptr i8, ptr [[DST1]], i64 64
-; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr [[DST2:%.*]], ptr [[P]], i64 64, i1 false)
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[DST2:%.*]], i8 [[C]], i64 64, i1 false)
 ; CHECK-NEXT:    ret void
 ;
   call void @llvm.memset.p0.i64(ptr %dst1, i8 %c, i64 128, i1 false)
diff --git a/llvm/test/Transforms/MemCpyOpt/mixed-sizes.ll b/llvm/test/Transforms/MemCpyOpt/mixed-sizes.ll
index 5e13432746bf7..0e312bc42d463 100644
--- a/llvm/test/Transforms/MemCpyOpt/mixed-sizes.ll
+++ b/llvm/test/Transforms/MemCpyOpt/mixed-sizes.ll
@@ -19,7 +19,7 @@ define i32 @foo(i1 %z) {
 ; CHECK:       for.body3.lr.ph:
 ; CHECK-NEXT:    br label [[FOR_INC7_1]]
 ; CHECK:       for.inc7.1:
-; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[A]], ptr align 4 [[SCEVGEP]], i64 4, i1 false)
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[A]], i8 0, i64 4, i1 false)
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
diff --git a/llvm/test/Transforms/MemCpyOpt/variable-sized-memset-memcpy.ll b/llvm/test/Transforms/MemCpyOpt/variable-sized-memset-memcpy.ll
index a834d2465dfa5..d739d53d8a62c 100644
--- a/llvm/test/Transforms/MemCpyOpt/variable-sized-memset-memcpy.ll
+++ b/llvm/test/Transforms/MemCpyOpt/variable-sized-memset-memcpy.ll
@@ -18,13 +18,13 @@ define void @test(ptr %src, i8 %c, i64 %size) {
   ret void
 }
 
-; Differing sizes, so left as it is.
+; Differing sizes, but would be UB if size1 > size2
 define void @negative_test(ptr %src, i8 %c, i64 %size1, i64 %size2) {
 ; CHECK-LABEL: @negative_test(
 ; CHECK-NEXT:    [[DST1:%.*]] = alloca i8, i64 [[SIZE1:%.*]], align 1
 ; CHECK-NEXT:    [[DST2:%.*]] = alloca i8, i64 [[SIZE2:%.*]], align 1
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[DST1]], i8 [[C:%.*]], i64 [[SIZE1]], i1 false)
-; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST2]], ptr align 8 [[DST1]], i64 [[SIZE2]], i1 false)
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[DST2]], i8 [[C]], i64 [[SIZE2]], i1 false)
 ; CHECK-NEXT:    ret void
 ;
   %dst1 = alloca i8, i64 %size1

>From bd2e8871c5b70e258f365a425fca980ed1b2d01a Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash at gmail.com>
Date: Mon, 2 Jun 2025 20:43:52 +0000
Subject: [PATCH 2/2] remove handling of undef input ranges (incl offsets < 0)

---
 .../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 50 ++++++++-----------
 .../Transforms/MemCpyOpt/lifetime-missing.ll  |  6 +--
 .../MemCpyOpt/memset-memcpy-oversized.ll      | 31 ++++++------
 .../MemCpyOpt/variable-sized-memset-memcpy.ll |  4 +-
 4 files changed, 38 insertions(+), 53 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 95d956f18aa69..f6dbfd935a211 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1443,54 +1443,44 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
   int64_t MOffset = 0;
   const DataLayout &DL = MemCpy->getModule()->getDataLayout();
   // We can only transforms memcpy's where the dest of one is the source of the
-  // other, or they have a known offset.
+  // other, or the memory transfer has a known offset from the memset.
   if (MemCpy->getSource() != MemSet->getDest()) {
     std::optional<int64_t> Offset =
         MemCpy->getSource()->getPointerOffsetFrom(MemSet->getDest(), DL);
-    if (!Offset)
+    if (!Offset || *Offset < 0)
       return false;
     MOffset = *Offset;
   }
 
   MaybeAlign MDestAlign = MemCpy->getDestAlign();
-  int64_t MOffsetAligned = MDestAlign.valueOrOne().value() > 1 && MOffset < 0 ? -(-MOffset & ~(MDestAlign.valueOrOne().value() - 1)) : MOffset; // Compute the MOffset that keeps MDest aligned (truncate towards zero)
   if (MOffset != 0 || MemSetSize != CopySize) {
-    // Make sure the memcpy doesn't read any more than what the memset wrote, other than undef.
+    // Make sure the memcpy doesn't read any more than what the memset wrote,
+    // other than undef. Don't worry about sizes larger than i64. A known memset
+    // size is required.
     auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
+    if (!CMemSetSize)
+      return false;
+    // A known memcpy size is required.
     auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
-    // Don't worry about sizes larger than i64.
-    if (!CMemSetSize || !CCopySize || MOffset < 0 ||
-        CCopySize->getZExtValue() + MOffset > CMemSetSize->getZExtValue()) {
+    if (!CCopySize)
+      return false;
+    if (CCopySize->getZExtValue() + MOffset > CMemSetSize->getZExtValue()) {
       if (!coversInputFully(MSSA, MemCpy, MemSet, BAA))
         return false;
-
-      if (CMemSetSize && CCopySize) {
-        // If both have constant sizes and offsets, clip the memcpy to the bounds of the memset if applicable.
-        if (CCopySize->getZExtValue() + std::abs(MOffset) > CMemSetSize->getZExtValue()) {
-          if (MOffsetAligned == 0 || (MOffset < 0 && CCopySize->getZExtValue() + MOffset > CMemSetSize->getZExtValue()))
-            CopySize = MemSetSize;
-          else
-            CopySize = ConstantInt::get(CopySize->getType(), std::max((int64_t)0, (int64_t)(CMemSetSize->getZExtValue() - std::abs(MOffsetAligned))));
-        }
-        else if (MOffsetAligned < 0) {
-          // Even if CMemSetSize isn't known, if the MOffsetAligned is negative, make sure to clip the new memset
-          CopySize = ConstantInt::get(CopySize->getType(), CCopySize->getZExtValue() + MOffsetAligned);
-        }
-      }
-      else if (CCopySize && MOffsetAligned < 0) {
-        // Even if CMemSetSize isn't known, if the MOffsetAligned is negative, can still clip the new memset
-        CopySize = ConstantInt::get(CopySize->getType(), CCopySize->getZExtValue() + MOffsetAligned);
-      }
-      else {
-        MOffsetAligned = 0;
-      }
+      // Clip the memcpy to the bounds of the memset
+      if (MOffset == 0)
+        CopySize = MemSetSize;
+      else
+        CopySize =
+            ConstantInt::get(CopySize->getType(),
+                             CMemSetSize->getZExtValue() <= (uint64_t)MOffset
+                                 ? 0
+                                 : CMemSetSize->getZExtValue() - MOffset);
     }
   }
 
   IRBuilder<> Builder(MemCpy);
   Value *MDest = MemCpy->getRawDest();
-  if (MOffsetAligned < 0)
-    MDest = Builder.CreateInBoundsPtrAdd(MDest, Builder.getInt64(-MOffsetAligned));
   Instruction *NewM =
       Builder.CreateMemSet(MDest, MemSet->getOperand(1),
                            CopySize, MDestAlign);
diff --git a/llvm/test/Transforms/MemCpyOpt/lifetime-missing.ll b/llvm/test/Transforms/MemCpyOpt/lifetime-missing.ll
index 3ad49a63b357a..0626f09702f7e 100644
--- a/llvm/test/Transforms/MemCpyOpt/lifetime-missing.ll
+++ b/llvm/test/Transforms/MemCpyOpt/lifetime-missing.ll
@@ -14,12 +14,8 @@ define void @test() {
 ; CHECK-LABEL: define void @test() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[AGG_TMP_SROA_14:%.*]] = alloca [20 x i8], align 4
-; CHECK-NEXT:    [[AGG_TMP_SROA_15:%.*]] = alloca [20 x i8], align 4
-; CHECK-NEXT:    [[AGG_TMP_SROA_14_128_SROA_IDX:%.*]] = getelementptr i8, ptr [[AGG_TMP_SROA_15]], i64 4
+; CHECK-NEXT:    [[AGG_TMP_SROA_14_128_SROA_IDX:%.*]] = getelementptr i8, ptr [[AGG_TMP_SROA_14]], i64 4
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[AGG_TMP_SROA_14_128_SROA_IDX]], i8 0, i64 1, i1 false)
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 20, ptr [[AGG_TMP_SROA_14]])
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[AGG_TMP_SROA_14]], i64 4
-; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[TMP0]], i8 0, i64 1, i1 false)
 ; CHECK-NEXT:    [[AGG_TMP3_SROA_35_128_SROA_IDX:%.*]] = getelementptr i8, ptr [[AGG_TMP_SROA_14]], i64 4
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr inttoptr (i64 4 to ptr), i8 0, i64 1, i1 false)
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr null, i8 0, i64 1, i1 false)
diff --git a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll
index 424cdde5fa780..0c16f34590fc7 100644
--- a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll
@@ -187,17 +187,16 @@ define void @test_write_before_memset_in_both_regions(ptr %result) {
   ret void
 }
 
-define void @test_offset_memset(ptr %result) {
-; CHECK-LABEL: @test_offset_memset(
-; CHECK-NEXT:    [[A1:%.*]] = alloca [4 x i32], align 8
-; CHECK-NEXT:    [[A:%.*]] = getelementptr i32, ptr [[A1]], i32 1
+define void @test_negative_offset_memset(ptr %result) {
+; CHECK-LABEL: @test_negative_offset_memset(
+; CHECK-NEXT:    [[A1:%.*]] = alloca [16 x i8], align 8
+; CHECK-NEXT:    [[A:%.*]] = getelementptr i8, ptr [[A1]], i32 4
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[A]], i8 0, i64 12, i1 false)
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[RESULT:%.*]], i64 4
-; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[TMP1]], i8 0, i64 8, i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr [[RESULT:%.*]], ptr align 8 [[A1]], i64 12, i1 false)
 ; CHECK-NEXT:    ret void
 ;
-  %a = alloca [ 4 x i32 ], align 8
-  %b = getelementptr i32, ptr %a, i32 1
+  %a = alloca [ 16 x i8 ], align 8
+  %b = getelementptr i8, ptr %a, i32 4
   call void @llvm.memset.p0.i64(ptr align 8 %b, i8 0, i64 12, i1 false)
   call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %a, i64 12, i1 false)
   ret void
@@ -205,14 +204,14 @@ define void @test_offset_memset(ptr %result) {
 
 define void @test_offset_memsetcpy(ptr %result) {
 ; CHECK-LABEL: @test_offset_memsetcpy(
-; CHECK-NEXT:    [[A1:%.*]] = alloca [4 x i32], align 8
-; CHECK-NEXT:    [[A:%.*]] = getelementptr i32, ptr [[A1]], i32 1
+; CHECK-NEXT:    [[A1:%.*]] = alloca [16 x i8], align 8
+; CHECK-NEXT:    [[A:%.*]] = getelementptr i8, ptr [[A1]], i32 4
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[A1]], i8 0, i64 12, i1 false)
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[RESULT:%.*]], i8 0, i64 8, i1 false)
 ; CHECK-NEXT:    ret void
 ;
-  %a = alloca [ 4 x i32 ], align 8
-  %b = getelementptr i32, ptr %a, i32 1
+  %a = alloca [ 16 x i8 ], align 8
+  %b = getelementptr i8, ptr %a, i32 4
   call void @llvm.memset.p0.i64(ptr align 8 %a, i8 0, i64 12, i1 false)
   call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %b, i64 12, i1 false)
   ret void
@@ -220,15 +219,15 @@ define void @test_offset_memsetcpy(ptr %result) {
 
 define void @test_two_memset(ptr %result) {
 ; CHECK-LABEL: @test_two_memset(
-; CHECK-NEXT:    [[A:%.*]] = alloca [4 x i32], align 8
-; CHECK-NEXT:    [[B:%.*]] = getelementptr i32, ptr [[A]], i32 3
+; CHECK-NEXT:    [[A:%.*]] = alloca [16 x i8], align 8
+; CHECK-NEXT:    [[B:%.*]] = getelementptr i8, ptr [[A]], i32 12
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[A]], i8 0, i64 12, i1 false)
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[B]], i8 1, i64 4, i1 false)
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr [[RESULT:%.*]], ptr align 8 [[A]], i64 16, i1 false)
 ; CHECK-NEXT:    ret void
 ;
-  %a = alloca [ 4 x i32 ], align 8
-  %b = getelementptr i32, ptr %a, i32 3
+  %a = alloca [ 16 x i8 ], align 8
+  %b = getelementptr i8, ptr %a, i32 12
   call void @llvm.memset.p0.i64(ptr align 8 %a, i8 0, i64 12, i1 false)
   call void @llvm.memset.p0.i64(ptr align 8 %b, i8 1, i64 4, i1 false)
   call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %a, i64 16, i1 false)
diff --git a/llvm/test/Transforms/MemCpyOpt/variable-sized-memset-memcpy.ll b/llvm/test/Transforms/MemCpyOpt/variable-sized-memset-memcpy.ll
index d739d53d8a62c..d5b1ab9b2f299 100644
--- a/llvm/test/Transforms/MemCpyOpt/variable-sized-memset-memcpy.ll
+++ b/llvm/test/Transforms/MemCpyOpt/variable-sized-memset-memcpy.ll
@@ -18,13 +18,13 @@ define void @test(ptr %src, i8 %c, i64 %size) {
   ret void
 }
 
-; Differing sizes, but would be UB if size1 > size2
+; Differing sizes, but would be UB if size1 < size2 since the memcpy would reference outside of the first alloca
 define void @negative_test(ptr %src, i8 %c, i64 %size1, i64 %size2) {
 ; CHECK-LABEL: @negative_test(
 ; CHECK-NEXT:    [[DST1:%.*]] = alloca i8, i64 [[SIZE1:%.*]], align 1
 ; CHECK-NEXT:    [[DST2:%.*]] = alloca i8, i64 [[SIZE2:%.*]], align 1
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[DST1]], i8 [[C:%.*]], i64 [[SIZE1]], i1 false)
-; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[DST2]], i8 [[C]], i64 [[SIZE2]], i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST2]], ptr align 8 [[DST1]], i64 [[SIZE2]], i1 false)
 ; CHECK-NEXT:    ret void
 ;
   %dst1 = alloca i8, i64 %size1



More information about the llvm-commits mailing list