[llvm] [MemCpyOpt] allow more memcpy-to-memcpy optimization (PR #150792)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Jul 26 12:13:55 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Jameson Nash (vtjnash)
<details>
<summary>Changes</summary>
Allow the memcpy-to-memcpy optimization even when the sizes are not identical. For example, it might have been generated as a small slice of a larger struct (currently only for zero offset however), or might be only storing to part of an oversized alloca.
---
Patch is 20.07 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/150792.diff
4 Files Affected:
- (modified) llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp (+97-25)
- (modified) llvm/test/Transforms/MemCpyOpt/preserve-memssa.ll (+2-5)
- (modified) llvm/test/Transforms/MemCpyOpt/stack-move.ll (+81-58)
- (modified) llvm/test/Transforms/MemCpyOpt/stackrestore.ll (+2-3)
``````````diff
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 9220abb974d21..a8273321b6deb 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -23,6 +23,7 @@
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/InstSimplifyFolder.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
@@ -1395,8 +1396,10 @@ static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V,
if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
auto *LTSize = cast<ConstantInt>(II->getArgOperand(0));
+ if (LTSize->getZExtValue() == (uint64_t)-1)
+ return true;
- if (auto *CSize = dyn_cast<ConstantInt>(Size)) {
+ if (auto *CSize = dyn_cast_or_null<ConstantInt>(Size)) {
if (AA.isMustAlias(V, II->getArgOperand(1)) &&
LTSize->getZExtValue() >= CSize->getZExtValue())
return true;
@@ -1439,6 +1442,30 @@ static bool overreadUndefContents(MemorySSA *MSSA, MemCpyInst *MemCpy,
return false;
}
+// If only the MemSrc instruction is known, a similar but slightly weaker
+// analysis can apply
+static bool allOverreadUndefContents(MemorySSA *MSSA, Instruction *Store,
+ BatchAAResults &BAA) {
+ MemoryLocation Loc;
+ Value *Ptr;
+ if (auto SI = dyn_cast<StoreInst>(Store)) {
+ Loc = MemoryLocation::get(SI);
+ Ptr = SI->getPointerOperand();
+ } else if (auto MI = dyn_cast<MemCpyInst>(Store)) {
+ Loc = MemoryLocation::getForDest(MI);
+ Ptr = MI->getDest();
+ } else {
+ llvm_unreachable("performStackMoveOptzn must have a known store kind");
+ }
+ MemoryUseOrDef *MemAccess = MSSA->getMemoryAccess(Store);
+ MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
+ MemAccess->getDefiningAccess(), Loc, BAA);
+ if (auto *MD = dyn_cast<MemoryDef>(Clobber))
+ if (hasUndefContents(MSSA, BAA, Ptr, MD, nullptr))
+ return true;
+ return false;
+}
+
/// Transform memcpy to memset when its source was just memset.
/// In other words, turn:
/// \code
@@ -1532,21 +1559,43 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
return false;
}
- // Check that copy is full with static size.
- const DataLayout &DL = DestAlloca->getDataLayout();
- std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize(DL);
- if (!SrcSize || Size != *SrcSize) {
- LLVM_DEBUG(dbgs() << "Stack Move: Source alloca size mismatch\n");
- return false;
- }
- std::optional<TypeSize> DestSize = DestAlloca->getAllocationSize(DL);
- if (!DestSize || Size != *DestSize) {
- LLVM_DEBUG(dbgs() << "Stack Move: Destination alloca size mismatch\n");
+ if (SrcAlloca->isUsedWithInAlloca() || DestAlloca->isUsedWithInAlloca())
return false;
- }
- if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca())
- return false;
+ Type *SrcType = SrcAlloca->getAllocatedType();
+ Type *DestType = DestAlloca->getAllocatedType();
+ // If they don't have common type, then they will need to be converted to a
+ // common size at runtime
+ const auto &DL = SrcAlloca->getDataLayout();
+ TypeSize SrcSize = DL.getTypeAllocSize(SrcType);
+ TypeSize DestSize = DL.getTypeAllocSize(DestType);
+ if (SrcType != DestType)
+ if (SrcSize != DestSize)
+ if (!SrcSize.isFixed() || !DestSize.isFixed())
+ return false;
+
+ // Check that copy is full with dest size, either because it wrote every byte,
+ // or it was fresh.
+ std::optional<TypeSize> FullSize = DestAlloca->getAllocationSize(DL);
+ if (!FullSize || Size != *FullSize)
+ if (!allOverreadUndefContents(MSSA, Store, BAA)) {
+ LLVM_DEBUG(dbgs() << "Stack Move: Destination alloca size mismatch\n");
+ return false;
+ }
+
+ // Check if it will be legal to combine allocas without breaking dominator.
+ // TODO: Try to hoist the arguments (recursively) instead of giving up
+ // immediately.
+ bool MoveSrc = !DT->dominates(SrcAlloca, DestAlloca);
+ if (MoveSrc) {
+ if (!DT->dominates(DestAlloca, SrcAlloca))
+ return false;
+ if (!DT->dominates(SrcAlloca->getArraySize(), DestAlloca))
+ return false;
+ } else {
+ if (!DT->dominates(DestAlloca->getArraySize(), SrcAlloca))
+ return false;
+ }
// Check that src and dest are never captured, unescaped allocas. Also
// find the nearest common dominator and postdominator for all users in
@@ -1555,7 +1604,6 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
SmallVector<Instruction *, 4> LifetimeMarkers;
SmallSet<Instruction *, 4> AAMetadataInstrs;
- bool SrcNotDom = false;
auto CaptureTrackingWithModRef =
[&](Instruction *AI,
@@ -1569,10 +1617,6 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
Instruction *I = Worklist.pop_back_val();
for (const Use &U : I->uses()) {
auto *UI = cast<Instruction>(U.getUser());
- // If any use that isn't dominated by SrcAlloca exists, we move src
- // alloca to the entry before the transformation.
- if (!DT->dominates(SrcAlloca, UI))
- SrcNotDom = true;
if (Visited.size() >= MaxUsesToExplore) {
LLVM_DEBUG(
@@ -1680,15 +1724,43 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback))
return false;
- // We can do the transformation. First, move the SrcAlloca to the start of the
- // BB.
- if (SrcNotDom)
- SrcAlloca->moveBefore(*SrcAlloca->getParent(),
- SrcAlloca->getParent()->getFirstInsertionPt());
+ // We can now do the transformation. First move the Src if it was after Dest.
+ if (MoveSrc)
+ SrcAlloca->moveBefore(DestAlloca->getIterator());
+
// Align the allocas appropriately.
SrcAlloca->setAlignment(
std::max(SrcAlloca->getAlign(), DestAlloca->getAlign()));
+ // Size the allocas appropriately.
+ Value *SrcArraySize = SrcAlloca->getArraySize();
+ Value *DestArraySize = DestAlloca->getArraySize();
+ IRBuilder<InstSimplifyFolder> Builder(SrcAlloca->getContext(),
+ InstSimplifyFolder(DL));
+ Builder.SetInsertPoint(SrcAlloca);
+ Type *Int32Ty = Builder.getInt32Ty();
+ if (SrcType != DestType && SrcSize != DestSize) {
+ SrcAlloca->setAllocatedType(Type::getInt8Ty(Load->getContext()));
+ if (SrcArraySize->getType() != Int32Ty)
+ SrcArraySize = Builder.CreateZExtOrTrunc(SrcArraySize, Int32Ty);
+ if (DestArraySize->getType() != Int32Ty)
+ DestArraySize = Builder.CreateZExtOrTrunc(DestArraySize, Int32Ty);
+ SrcArraySize = Builder.CreateMul(
+ SrcArraySize, ConstantInt::get(Int32Ty, SrcSize.getFixedValue()), "",
+ true, true);
+ DestArraySize = Builder.CreateMul(
+ DestArraySize, ConstantInt::get(Int32Ty, DestSize.getFixedValue()), "",
+ true, true);
+ }
+ if (SrcArraySize != DestArraySize) {
+ if (SrcArraySize->getType() != DestArraySize->getType()) {
+ SrcArraySize = Builder.CreateZExtOrTrunc(SrcArraySize, Int32Ty);
+ DestArraySize = Builder.CreateZExtOrTrunc(DestArraySize, Int32Ty);
+ }
+ SrcAlloca->setOperand(0, Builder.CreateBinaryIntrinsic(
+ Intrinsic::umax, SrcArraySize, DestArraySize));
+ }
+
// Merge the two allocas.
DestAlloca->replaceAllUsesWith(SrcAlloca);
eraseInstruction(DestAlloca);
@@ -1716,7 +1788,7 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
I->setMetadata(LLVMContext::MD_tbaa_struct, nullptr);
}
- LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n");
+ LLVM_DEBUG(dbgs() << "Stack Move: Performed stack-move optimization\n");
NumStackMove++;
return true;
}
diff --git a/llvm/test/Transforms/MemCpyOpt/preserve-memssa.ll b/llvm/test/Transforms/MemCpyOpt/preserve-memssa.ll
index ff36bf0315311..5f193c851c732 100644
--- a/llvm/test/Transforms/MemCpyOpt/preserve-memssa.ll
+++ b/llvm/test/Transforms/MemCpyOpt/preserve-memssa.ll
@@ -76,18 +76,15 @@ declare void @decompose(ptr nocapture)
define void @test5(ptr %ptr) {
; CHECK-LABEL: @test5(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[EARLY_DATA:%.*]] = alloca [128 x i8], align 8
-; CHECK-NEXT: [[TMP:%.*]] = alloca [[T:%.*]], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr [[EARLY_DATA]])
+; CHECK-NEXT: [[TMP:%.*]] = alloca i8, i32 8224, align 8
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[PTR:%.*]], align 8
; CHECK-NEXT: call fastcc void @decompose(ptr [[TMP]])
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[EARLY_DATA]], ptr [[TMP]], i64 32, i1 false)
; CHECK-NEXT: ret void
;
entry:
%early_data = alloca [128 x i8], align 8
%tmp = alloca %t, align 8
- call void @llvm.lifetime.start.p0(i64 32, ptr %early_data)
+ call void @llvm.lifetime.start.p0(i64 128, ptr %early_data)
%0 = load i32, ptr %ptr, align 8
call fastcc void @decompose(ptr %tmp)
call void @llvm.memcpy.p0.p0.i64(ptr %early_data, ptr %tmp, i64 32, i1 false)
diff --git a/llvm/test/Transforms/MemCpyOpt/stack-move.ll b/llvm/test/Transforms/MemCpyOpt/stack-move.ll
index 31e255b83eb9e..843e79e719194 100644
--- a/llvm/test/Transforms/MemCpyOpt/stack-move.ll
+++ b/llvm/test/Transforms/MemCpyOpt/stack-move.ll
@@ -1023,22 +1023,14 @@ bb2:
}
-; Optimization failures follow:
-
; Tests that a memcpy that doesn't completely overwrite a stack value is a use
; for the purposes of liveness analysis, not a definition.
define void @incomplete_memcpy() {
; CHECK-LABEL: define void @incomplete_memcpy() {
-; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
-; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr captures(none) [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr captures(none) [[DEST]])
-; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr noundef captures(none) [[SRC]])
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 11, i1 false)
+; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[DEST]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr noundef captures(none) [[DEST]])
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr noundef captures(none) [[DEST]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr captures(none) [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr captures(none) [[DEST]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4
@@ -1058,17 +1050,10 @@ define void @incomplete_memcpy() {
; for the purposes of liveness analysis, not a definition.
define void @incomplete_store() {
; CHECK-LABEL: define void @incomplete_store() {
-; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
-; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr captures(none) [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr captures(none) [[DEST]])
-; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr noundef captures(none) [[SRC]])
-; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[SRC]], align 4
-; CHECK-NEXT: store i32 [[TMP2]], ptr [[DEST]], align 4
+; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[DEST]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr noundef captures(none) [[DEST]])
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr noundef captures(none) [[DEST]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr captures(none) [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr captures(none) [[DEST]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 4
@@ -1085,20 +1070,86 @@ define void @incomplete_store() {
ret void
}
+; Tests merging allocas with different sizes
+define void @mismatched_alloca_size() {
+; CHECK-LABEL: define void @mismatched_alloca_size() {
+; CHECK-NEXT: [[SRC:%.*]] = alloca i8, i64 24, align 4
+; CHECK-NEXT: store [[STRUCT_FOO:%.*]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr captures(none) [[SRC]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr captures(none) [[SRC]])
+; CHECK-NEXT: ret void
+;
+ %src = alloca i8, i64 24, align 4
+ %dest = alloca i8, i64 12, align 4
+ call void @llvm.lifetime.start.p0(i64 24, ptr nocapture %src)
+ call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %dest)
+ store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src
+ %1 = call i32 @use_nocapture(ptr nocapture %src)
+
+ call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest, ptr align 4 %src, i64 12, i1 false)
+
+ %2 = call i32 @use_nocapture(ptr nocapture %dest)
+ call void @llvm.lifetime.end.p0(i64 24, ptr nocapture %src)
+ call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %dest)
+ ret void
+}
+
+; Tests merging allocas with different types
+define void @mismatched_alloca_type() {
+; CHECK-LABEL: define void @mismatched_alloca_type() {
+; CHECK-NEXT: [[SRC:%.*]] = alloca i8, i64 6, align 4
+; CHECK-NEXT: store [[STRUCT_FOO:%.*]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr captures(none) [[SRC]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr captures(none) [[SRC]])
+; CHECK-NEXT: ret void
+;
+ %src = alloca i16, i64 6, align 4
+ %dest = alloca i8, i64 12, align 4
+ call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %src)
+ call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %dest)
+ store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src
+ %1 = call i32 @use_nocapture(ptr nocapture %src)
+
+ call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest, ptr align 4 %src, i64 12, i1 false)
+
+ %2 = call i32 @use_nocapture(ptr nocapture %dest)
+ call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %src)
+ call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %dest)
+ ret void
+}
+
+; Tests merging allocas with different types and sizes
+define void @mismatched_alloca_type_size() {
+; CHECK-LABEL: define void @mismatched_alloca_type_size() {
+; CHECK-NEXT: [[SRC:%.*]] = alloca i8, i32 24, align 4
+; CHECK-NEXT: store [[STRUCT_FOO:%.*]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr captures(none) [[SRC]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr captures(none) [[SRC]])
+; CHECK-NEXT: ret void
+;
+ %src = alloca i16, i64 12, align 4
+ %dest = alloca i8, i64 12, align 4
+ call void @llvm.lifetime.start.p0(i64 24, ptr nocapture %src)
+ call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %dest)
+ store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src
+ %1 = call i32 @use_nocapture(ptr nocapture %src)
+
+ call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest, ptr align 4 %src, i64 12, i1 false)
+
+ %2 = call i32 @use_nocapture(ptr nocapture %dest)
+ call void @llvm.lifetime.end.p0(i64 24, ptr nocapture %src)
+ call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %dest)
+ ret void
+}
+
; Tests that dynamically-sized allocas are never merged.
define void @dynamically_sized_alloca(i64 %i) {
; CHECK-LABEL: define void @dynamically_sized_alloca
; CHECK-SAME: (i64 [[I:%.*]]) {
; CHECK-NEXT: [[SRC:%.*]] = alloca i8, i64 [[I]], align 4
-; CHECK-NEXT: [[DEST:%.*]] = alloca i8, i64 [[I]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr captures(none) [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr captures(none) [[DEST]])
; CHECK-NEXT: store [[STRUCT_FOO:%.*]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr captures(none) [[SRC]])
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr captures(none) [[DEST]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 -1, ptr captures(none) [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 -1, ptr captures(none) [[DEST]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr captures(none) [[SRC]])
; CHECK-NEXT: ret void
;
%src = alloca i8, i64 %i, align 4
@@ -1117,6 +1168,8 @@ define void @dynamically_sized_alloca(i64 %i) {
}
+; Optimization failures follow:
+
; Tests that inalloca attributed allocas are never merged, to prevent stacksave/stackrestore handling.
define void @inalloca() {
; CHECK-LABEL: define void @inalloca() {
@@ -1178,36 +1231,6 @@ define void @dynamically_sized_memcpy(i64 %size) {
ret void
}
-; Tests that allocas with different sizes aren't merged together.
-define void @mismatched_alloca_size() {
-; CHECK-LABEL: define void @mismatched_alloca_size() {
-; CHECK-NEXT: [[SRC:%.*]] = alloca i8, i64 24, align 4
-; CHECK-NEXT: [[DEST:%.*]] = alloca i8, i64 12, align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr captures(none) [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr captures(none) [[DEST]])
-; CHECK-NEXT: store [[STRUCT_FOO:%.*]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr captures(none) [[SRC]])
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr captures(none) [[DEST]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr captures(none) [[SRC]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr captures(none) [[DEST]])
-; CHECK-NEXT: ret void
-;
- %src = alloca i8, i64 24, align 4
- %dest = alloca i8, i64 12, align 4
- call void @llvm.lifetime.start.p0(i64 24, ptr nocapture %src)
- call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %dest)
- store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src
- %1 = call i32 @use_nocapture(ptr nocapture %src)
-
- call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest, ptr align 4 %src, i64 12, i1 false)
-
- %2 = call i32 @use_nocapture(ptr nocapture %dest)
- call void @llvm.lifetime.end.p0(i64 24, ptr nocapture %src)
- call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %dest)
- ret void
-}
-
; Tests that allocas with mismatched address spaces aren't combined.
define void @mismatched_alloca_addrspace() {
; CHECK-LABEL: define void @mismatched_alloca_addrspace() {
diff --git a/llvm/test/Transforms/MemCpyOpt/stackrestore.ll b/llvm/test/Transforms/MemCpyOpt/stackrestore.ll
index 0fc37c44fa9e8..493ca3faabb61 100644
--- a/llvm/test/Transforms/MemCpyOpt/stackrestore.ll
+++ b/llvm/test/Transforms/MemCpyOpt/stackrestore.ll
@@ -16,12 +16,11 @@ target triple = "i686-unknown-windows-msvc19.14.26433"
define i32 @test_norestore(i32 %n) {
; CHECK-LABEL: @test_norestore(
-; CHECK-NEXT: [[TMPMEM:%.*]] = alloca [10 x i8], align 4
-; CHECK-NEXT: [[P:%.*]] = alloca i8, i32 [[N:%.*]], align 4
+; CHECK-NEXT: [[N:%.*]] = call i32 @llvm.umax.i32(i32 [[N1:%.*]], i32 10)
+; CHECK-NEXT: [[P:%.*]] = alloca i8, i32 [[N]], align 4
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr [[P]], ptr align 1 @str, i32 9, i1 false)
; CHECK-NEXT: [[P10:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 9
; CHECK-NEXT: store i8 0, ptr [[P10]], align 1
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr [[TMPMEM]], ptr [[P]], i32 10, i1 false)
; CHECK-NEXT: call void @external()
; CHECK-NEXT: [[HEAP:%.*]] = call ptr @malloc(i32 9)
; CHECK-NEXT: call void ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/150792
More information about the llvm-commits
mailing list