[llvm] [MemCpyOpt] allow more memcpy-to-memcpy optimization (PR #150792)

via llvm-commits llvm-commits at lists.llvm.org
Sat Jul 26 12:13:55 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Jameson Nash (vtjnash)

<details>
<summary>Changes</summary>

Allow the memcpy-to-memcpy optimization even when the sizes are not identical. For example, it might have been generated as a small slice of a larger struct (currently only for zero offset however), or might be only storing to part of an oversized alloca.

---

Patch is 20.07 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/150792.diff


4 Files Affected:

- (modified) llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp (+97-25) 
- (modified) llvm/test/Transforms/MemCpyOpt/preserve-memssa.ll (+2-5) 
- (modified) llvm/test/Transforms/MemCpyOpt/stack-move.ll (+81-58) 
- (modified) llvm/test/Transforms/MemCpyOpt/stackrestore.ll (+2-3) 


``````````diff
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 9220abb974d21..a8273321b6deb 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/InstSimplifyFolder.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/MemoryLocation.h"
@@ -1395,8 +1396,10 @@ static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V,
   if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
     if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
       auto *LTSize = cast<ConstantInt>(II->getArgOperand(0));
+      if (LTSize->getZExtValue() == (uint64_t)-1)
+        return true;
 
-      if (auto *CSize = dyn_cast<ConstantInt>(Size)) {
+      if (auto *CSize = dyn_cast_or_null<ConstantInt>(Size)) {
         if (AA.isMustAlias(V, II->getArgOperand(1)) &&
             LTSize->getZExtValue() >= CSize->getZExtValue())
           return true;
@@ -1439,6 +1442,30 @@ static bool overreadUndefContents(MemorySSA *MSSA, MemCpyInst *MemCpy,
   return false;
 }
 
+// If only the MemSrc instruction is known, a similar but slightly weaker
+// analysis can apply
+static bool allOverreadUndefContents(MemorySSA *MSSA, Instruction *Store,
+                                     BatchAAResults &BAA) {
+  MemoryLocation Loc;
+  Value *Ptr;
+  if (auto SI = dyn_cast<StoreInst>(Store)) {
+    Loc = MemoryLocation::get(SI);
+    Ptr = SI->getPointerOperand();
+  } else if (auto MI = dyn_cast<MemCpyInst>(Store)) {
+    Loc = MemoryLocation::getForDest(MI);
+    Ptr = MI->getDest();
+  } else {
+    llvm_unreachable("performStackMoveOptzn must have a known store kind");
+  }
+  MemoryUseOrDef *MemAccess = MSSA->getMemoryAccess(Store);
+  MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
+      MemAccess->getDefiningAccess(), Loc, BAA);
+  if (auto *MD = dyn_cast<MemoryDef>(Clobber))
+    if (hasUndefContents(MSSA, BAA, Ptr, MD, nullptr))
+      return true;
+  return false;
+}
+
 /// Transform memcpy to memset when its source was just memset.
 /// In other words, turn:
 /// \code
@@ -1532,21 +1559,43 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
     return false;
   }
 
-  // Check that copy is full with static size.
-  const DataLayout &DL = DestAlloca->getDataLayout();
-  std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize(DL);
-  if (!SrcSize || Size != *SrcSize) {
-    LLVM_DEBUG(dbgs() << "Stack Move: Source alloca size mismatch\n");
-    return false;
-  }
-  std::optional<TypeSize> DestSize = DestAlloca->getAllocationSize(DL);
-  if (!DestSize || Size != *DestSize) {
-    LLVM_DEBUG(dbgs() << "Stack Move: Destination alloca size mismatch\n");
+  if (SrcAlloca->isUsedWithInAlloca() || DestAlloca->isUsedWithInAlloca())
     return false;
-  }
 
-  if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca())
-    return false;
+  Type *SrcType = SrcAlloca->getAllocatedType();
+  Type *DestType = DestAlloca->getAllocatedType();
+  // If they don't have common type, then they will need to be converted to a
+  // common size at runtime
+  const auto &DL = SrcAlloca->getDataLayout();
+  TypeSize SrcSize = DL.getTypeAllocSize(SrcType);
+  TypeSize DestSize = DL.getTypeAllocSize(DestType);
+  if (SrcType != DestType)
+    if (SrcSize != DestSize)
+      if (!SrcSize.isFixed() || !DestSize.isFixed())
+        return false;
+
+  // Check that copy is full with dest size, either because it wrote every byte,
+  // or it was fresh.
+  std::optional<TypeSize> FullSize = DestAlloca->getAllocationSize(DL);
+  if (!FullSize || Size != *FullSize)
+    if (!allOverreadUndefContents(MSSA, Store, BAA)) {
+      LLVM_DEBUG(dbgs() << "Stack Move: Destination alloca size mismatch\n");
+      return false;
+    }
+
+  // Check if it will be legal to combine allocas without breaking dominator.
+  // TODO: Try to hoist the arguments (recursively) instead of giving up
+  // immediately.
+  bool MoveSrc = !DT->dominates(SrcAlloca, DestAlloca);
+  if (MoveSrc) {
+    if (!DT->dominates(DestAlloca, SrcAlloca))
+      return false;
+    if (!DT->dominates(SrcAlloca->getArraySize(), DestAlloca))
+      return false;
+  } else {
+    if (!DT->dominates(DestAlloca->getArraySize(), SrcAlloca))
+      return false;
+  }
 
   // Check that src and dest are never captured, unescaped allocas. Also
   // find the nearest common dominator and postdominator for all users in
@@ -1555,7 +1604,6 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
 
   SmallVector<Instruction *, 4> LifetimeMarkers;
   SmallSet<Instruction *, 4> AAMetadataInstrs;
-  bool SrcNotDom = false;
 
   auto CaptureTrackingWithModRef =
       [&](Instruction *AI,
@@ -1569,10 +1617,6 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
       Instruction *I = Worklist.pop_back_val();
       for (const Use &U : I->uses()) {
         auto *UI = cast<Instruction>(U.getUser());
-        // If any use that isn't dominated by SrcAlloca exists, we move src
-        // alloca to the entry before the transformation.
-        if (!DT->dominates(SrcAlloca, UI))
-          SrcNotDom = true;
 
         if (Visited.size() >= MaxUsesToExplore) {
           LLVM_DEBUG(
@@ -1680,15 +1724,43 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
   if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback))
     return false;
 
-  // We can do the transformation. First, move the SrcAlloca to the start of the
-  // BB.
-  if (SrcNotDom)
-    SrcAlloca->moveBefore(*SrcAlloca->getParent(),
-                          SrcAlloca->getParent()->getFirstInsertionPt());
+  // We can now do the transformation. First move the Src if it was after Dest.
+  if (MoveSrc)
+    SrcAlloca->moveBefore(DestAlloca->getIterator());
+
   // Align the allocas appropriately.
   SrcAlloca->setAlignment(
       std::max(SrcAlloca->getAlign(), DestAlloca->getAlign()));
 
+  // Size the allocas appropriately.
+  Value *SrcArraySize = SrcAlloca->getArraySize();
+  Value *DestArraySize = DestAlloca->getArraySize();
+  IRBuilder<InstSimplifyFolder> Builder(SrcAlloca->getContext(),
+                                        InstSimplifyFolder(DL));
+  Builder.SetInsertPoint(SrcAlloca);
+  Type *Int32Ty = Builder.getInt32Ty();
+  if (SrcType != DestType && SrcSize != DestSize) {
+    SrcAlloca->setAllocatedType(Type::getInt8Ty(Load->getContext()));
+    if (SrcArraySize->getType() != Int32Ty)
+      SrcArraySize = Builder.CreateZExtOrTrunc(SrcArraySize, Int32Ty);
+    if (DestArraySize->getType() != Int32Ty)
+      DestArraySize = Builder.CreateZExtOrTrunc(DestArraySize, Int32Ty);
+    SrcArraySize = Builder.CreateMul(
+        SrcArraySize, ConstantInt::get(Int32Ty, SrcSize.getFixedValue()), "",
+        true, true);
+    DestArraySize = Builder.CreateMul(
+        DestArraySize, ConstantInt::get(Int32Ty, DestSize.getFixedValue()), "",
+        true, true);
+  }
+  if (SrcArraySize != DestArraySize) {
+    if (SrcArraySize->getType() != DestArraySize->getType()) {
+      SrcArraySize = Builder.CreateZExtOrTrunc(SrcArraySize, Int32Ty);
+      DestArraySize = Builder.CreateZExtOrTrunc(DestArraySize, Int32Ty);
+    }
+    SrcAlloca->setOperand(0, Builder.CreateBinaryIntrinsic(
+                                 Intrinsic::umax, SrcArraySize, DestArraySize));
+  }
+
   // Merge the two allocas.
   DestAlloca->replaceAllUsesWith(SrcAlloca);
   eraseInstruction(DestAlloca);
@@ -1716,7 +1788,7 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
     I->setMetadata(LLVMContext::MD_tbaa_struct, nullptr);
   }
 
-  LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n");
+  LLVM_DEBUG(dbgs() << "Stack Move: Performed stack-move optimization\n");
   NumStackMove++;
   return true;
 }
diff --git a/llvm/test/Transforms/MemCpyOpt/preserve-memssa.ll b/llvm/test/Transforms/MemCpyOpt/preserve-memssa.ll
index ff36bf0315311..5f193c851c732 100644
--- a/llvm/test/Transforms/MemCpyOpt/preserve-memssa.ll
+++ b/llvm/test/Transforms/MemCpyOpt/preserve-memssa.ll
@@ -76,18 +76,15 @@ declare void @decompose(ptr nocapture)
 define void @test5(ptr %ptr) {
 ; CHECK-LABEL: @test5(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[EARLY_DATA:%.*]] = alloca [128 x i8], align 8
-; CHECK-NEXT:    [[TMP:%.*]] = alloca [[T:%.*]], align 8
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 32, ptr [[EARLY_DATA]])
+; CHECK-NEXT:    [[TMP:%.*]] = alloca i8, i32 8224, align 8
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[PTR:%.*]], align 8
 ; CHECK-NEXT:    call fastcc void @decompose(ptr [[TMP]])
-; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr [[EARLY_DATA]], ptr [[TMP]], i64 32, i1 false)
 ; CHECK-NEXT:    ret void
 ;
 entry:
   %early_data = alloca [128 x i8], align 8
   %tmp = alloca %t, align 8
-  call void @llvm.lifetime.start.p0(i64 32, ptr %early_data)
+  call void @llvm.lifetime.start.p0(i64 128, ptr %early_data)
   %0 = load i32, ptr %ptr, align 8
   call fastcc void @decompose(ptr %tmp)
   call void @llvm.memcpy.p0.p0.i64(ptr %early_data, ptr %tmp, i64 32, i1 false)
diff --git a/llvm/test/Transforms/MemCpyOpt/stack-move.ll b/llvm/test/Transforms/MemCpyOpt/stack-move.ll
index 31e255b83eb9e..843e79e719194 100644
--- a/llvm/test/Transforms/MemCpyOpt/stack-move.ll
+++ b/llvm/test/Transforms/MemCpyOpt/stack-move.ll
@@ -1023,22 +1023,14 @@ bb2:
 }
 
 
-; Optimization failures follow:
-
 ; Tests that a memcpy that doesn't completely overwrite a stack value is a use
 ; for the purposes of liveness analysis, not a definition.
 define void @incomplete_memcpy() {
 ; CHECK-LABEL: define void @incomplete_memcpy() {
-; CHECK-NEXT:    [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
-; CHECK-NEXT:    [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 12, ptr captures(none) [[SRC]])
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 12, ptr captures(none) [[DEST]])
-; CHECK-NEXT:    store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use_nocapture(ptr noundef captures(none) [[SRC]])
-; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 11, i1 false)
+; CHECK-NEXT:    [[DEST:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT:    store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[DEST]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use_nocapture(ptr noundef captures(none) [[DEST]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @use_nocapture(ptr noundef captures(none) [[DEST]])
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 12, ptr captures(none) [[SRC]])
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 12, ptr captures(none) [[DEST]])
 ; CHECK-NEXT:    ret void
 ;
   %src = alloca %struct.Foo, align 4
@@ -1058,17 +1050,10 @@ define void @incomplete_memcpy() {
 ; for the purposes of liveness analysis, not a definition.
 define void @incomplete_store() {
 ; CHECK-LABEL: define void @incomplete_store() {
-; CHECK-NEXT:    [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
-; CHECK-NEXT:    [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 12, ptr captures(none) [[SRC]])
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 12, ptr captures(none) [[DEST]])
-; CHECK-NEXT:    store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use_nocapture(ptr noundef captures(none) [[SRC]])
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[SRC]], align 4
-; CHECK-NEXT:    store i32 [[TMP2]], ptr [[DEST]], align 4
+; CHECK-NEXT:    [[DEST:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT:    store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[DEST]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use_nocapture(ptr noundef captures(none) [[DEST]])
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @use_nocapture(ptr noundef captures(none) [[DEST]])
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 12, ptr captures(none) [[SRC]])
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 12, ptr captures(none) [[DEST]])
 ; CHECK-NEXT:    ret void
 ;
   %src = alloca %struct.Foo, align 4
@@ -1085,20 +1070,86 @@ define void @incomplete_store() {
   ret void
 }
 
+; Tests merging allocas with different sizes
+define void @mismatched_alloca_size() {
+; CHECK-LABEL: define void @mismatched_alloca_size() {
+; CHECK-NEXT:    [[SRC:%.*]] = alloca i8, i64 24, align 4
+; CHECK-NEXT:    store [[STRUCT_FOO:%.*]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use_nocapture(ptr captures(none) [[SRC]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @use_nocapture(ptr captures(none) [[SRC]])
+; CHECK-NEXT:    ret void
+;
+  %src = alloca i8, i64 24, align 4
+  %dest = alloca i8, i64 12, align 4
+  call void @llvm.lifetime.start.p0(i64 24, ptr nocapture %src)
+  call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %dest)
+  store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src
+  %1 = call i32 @use_nocapture(ptr nocapture %src)
+
+  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest, ptr align 4 %src, i64 12, i1 false)
+
+  %2 = call i32 @use_nocapture(ptr nocapture %dest)
+  call void @llvm.lifetime.end.p0(i64 24, ptr nocapture %src)
+  call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %dest)
+  ret void
+}
+
+; Tests merging allocas with different types
+define void @mismatched_alloca_type() {
+; CHECK-LABEL: define void @mismatched_alloca_type() {
+; CHECK-NEXT:    [[SRC:%.*]] = alloca i8, i64 6, align 4
+; CHECK-NEXT:    store [[STRUCT_FOO:%.*]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use_nocapture(ptr captures(none) [[SRC]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @use_nocapture(ptr captures(none) [[SRC]])
+; CHECK-NEXT:    ret void
+;
+  %src = alloca i16, i64 6, align 4
+  %dest = alloca i8, i64 12, align 4
+  call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %src)
+  call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %dest)
+  store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src
+  %1 = call i32 @use_nocapture(ptr nocapture %src)
+
+  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest, ptr align 4 %src, i64 12, i1 false)
+
+  %2 = call i32 @use_nocapture(ptr nocapture %dest)
+  call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %src)
+  call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %dest)
+  ret void
+}
+
+; Tests merging allocas with different types and sizes
+define void @mismatched_alloca_type_size() {
+; CHECK-LABEL: define void @mismatched_alloca_type_size() {
+; CHECK-NEXT:    [[SRC:%.*]] = alloca i8, i32 24, align 4
+; CHECK-NEXT:    store [[STRUCT_FOO:%.*]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use_nocapture(ptr captures(none) [[SRC]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @use_nocapture(ptr captures(none) [[SRC]])
+; CHECK-NEXT:    ret void
+;
+  %src = alloca i16, i64 12, align 4
+  %dest = alloca i8, i64 12, align 4
+  call void @llvm.lifetime.start.p0(i64 24, ptr nocapture %src)
+  call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %dest)
+  store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src
+  %1 = call i32 @use_nocapture(ptr nocapture %src)
+
+  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest, ptr align 4 %src, i64 12, i1 false)
+
+  %2 = call i32 @use_nocapture(ptr nocapture %dest)
+  call void @llvm.lifetime.end.p0(i64 24, ptr nocapture %src)
+  call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %dest)
+  ret void
+}
+
 ; Tests that dynamically-sized allocas are never merged.
 define void @dynamically_sized_alloca(i64 %i) {
 ; CHECK-LABEL: define void @dynamically_sized_alloca
 ; CHECK-SAME: (i64 [[I:%.*]]) {
 ; CHECK-NEXT:    [[SRC:%.*]] = alloca i8, i64 [[I]], align 4
-; CHECK-NEXT:    [[DEST:%.*]] = alloca i8, i64 [[I]], align 4
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 -1, ptr captures(none) [[SRC]])
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 -1, ptr captures(none) [[DEST]])
 ; CHECK-NEXT:    store [[STRUCT_FOO:%.*]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use_nocapture(ptr captures(none) [[SRC]])
-; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @use_nocapture(ptr captures(none) [[DEST]])
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 -1, ptr captures(none) [[SRC]])
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 -1, ptr captures(none) [[DEST]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @use_nocapture(ptr captures(none) [[SRC]])
 ; CHECK-NEXT:    ret void
 ;
   %src = alloca i8, i64 %i, align 4
@@ -1117,6 +1168,8 @@ define void @dynamically_sized_alloca(i64 %i) {
 }
 
 
+; Optimization failures follow:
+
 ; Tests that inalloca attributed allocas are never merged, to prevent stacksave/stackrestore handling.
 define void @inalloca() {
 ; CHECK-LABEL: define void @inalloca() {
@@ -1178,36 +1231,6 @@ define void @dynamically_sized_memcpy(i64 %size) {
   ret void
 }
 
-; Tests that allocas with different sizes aren't merged together.
-define void @mismatched_alloca_size() {
-; CHECK-LABEL: define void @mismatched_alloca_size() {
-; CHECK-NEXT:    [[SRC:%.*]] = alloca i8, i64 24, align 4
-; CHECK-NEXT:    [[DEST:%.*]] = alloca i8, i64 12, align 4
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 24, ptr captures(none) [[SRC]])
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 12, ptr captures(none) [[DEST]])
-; CHECK-NEXT:    store [[STRUCT_FOO:%.*]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use_nocapture(ptr captures(none) [[SRC]])
-; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @use_nocapture(ptr captures(none) [[DEST]])
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 24, ptr captures(none) [[SRC]])
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 12, ptr captures(none) [[DEST]])
-; CHECK-NEXT:    ret void
-;
-  %src = alloca i8, i64 24, align 4
-  %dest = alloca i8, i64 12, align 4
-  call void @llvm.lifetime.start.p0(i64 24, ptr nocapture %src)
-  call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %dest)
-  store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src
-  %1 = call i32 @use_nocapture(ptr nocapture %src)
-
-  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest, ptr align 4 %src, i64 12, i1 false)
-
-  %2 = call i32 @use_nocapture(ptr nocapture %dest)
-  call void @llvm.lifetime.end.p0(i64 24, ptr nocapture %src)
-  call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %dest)
-  ret void
-}
-
 ; Tests that allocas with mismatched address spaces aren't combined.
 define void @mismatched_alloca_addrspace() {
 ; CHECK-LABEL: define void @mismatched_alloca_addrspace() {
diff --git a/llvm/test/Transforms/MemCpyOpt/stackrestore.ll b/llvm/test/Transforms/MemCpyOpt/stackrestore.ll
index 0fc37c44fa9e8..493ca3faabb61 100644
--- a/llvm/test/Transforms/MemCpyOpt/stackrestore.ll
+++ b/llvm/test/Transforms/MemCpyOpt/stackrestore.ll
@@ -16,12 +16,11 @@ target triple = "i686-unknown-windows-msvc19.14.26433"
 
 define i32 @test_norestore(i32 %n) {
 ; CHECK-LABEL: @test_norestore(
-; CHECK-NEXT:    [[TMPMEM:%.*]] = alloca [10 x i8], align 4
-; CHECK-NEXT:    [[P:%.*]] = alloca i8, i32 [[N:%.*]], align 4
+; CHECK-NEXT:    [[N:%.*]] = call i32 @llvm.umax.i32(i32 [[N1:%.*]], i32 10)
+; CHECK-NEXT:    [[P:%.*]] = alloca i8, i32 [[N]], align 4
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr [[P]], ptr align 1 @str, i32 9, i1 false)
 ; CHECK-NEXT:    [[P10:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 9
 ; CHECK-NEXT:    store i8 0, ptr [[P10]], align 1
-; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr [[TMPMEM]], ptr [[P]], i32 10, i1 false)
 ; CHECK-NEXT:    call void @external()
 ; CHECK-NEXT:    [[HEAP:%.*]] = call ptr @malloc(i32 9)
 ; CHECK-NEXT:    call void ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/150792


More information about the llvm-commits mailing list