[llvm] 361464c - [MemCpyOpt] Use memcpy source directly if dest is known to be immutable from attributes
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 9 23:46:49 PDT 2023
Author: khei4
Date: 2023-06-10T15:46:32+09:00
New Revision: 361464c027239a70d66fb7790032b23696d5b303
URL: https://github.com/llvm/llvm-project/commit/361464c027239a70d66fb7790032b23696d5b303
DIFF: https://github.com/llvm/llvm-project/commit/361464c027239a70d66fb7790032b23696d5b303.diff
LOG: [MemCpyOpt] Use memcpy source directly if dest is known to be immutable from attributes
Differential Revision: https://reviews.llvm.org/D150970
Added:
Modified:
llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
llvm/test/Transforms/MemCpyOpt/memcpy.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
index 7c3b8ba3086ee..9ce64623e25b2 100644
--- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
+++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
@@ -73,6 +73,7 @@ class MemCpyOptPass : public PassInfoMixin<MemCpyOptPass> {
bool performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, MemSetInst *MemSet,
BatchAAResults &BAA);
bool processByValArgument(CallBase &CB, unsigned ArgNo);
+ bool processImmutArgument(CallBase &CB, unsigned ArgNo);
Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr,
Value *ByteVal);
bool moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI);
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 2f04cf35933c0..00937e0d734ab 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1604,6 +1604,101 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
return true;
}
+/// This is called on memcpy dest pointer arguments attributed as immutable
+/// during call. Try to use memcpy source directly if all of the following
+/// conditions are satisfied.
+/// 1. The memcpy dst is neither modified during the call nor captured by the
+/// call. (if readonly, noalias, nocapture attributes on call-site.)
+/// 2. The memcpy dst is an alloca with known alignment & size.
+/// 2-1. The memcpy length == the alloca size which ensures that the new
+/// pointer is dereferenceable for the required range
+/// 2-2. The src pointer has alignment >= the alloca alignment or can be
+/// enforced so.
+/// 3. The memcpy dst and src is not modified between the memcpy and the call.
+/// (if MSSA clobber check is safe.)
+/// 4. The memcpy src is not modified during the call. (ModRef check shows no
+/// Mod.)
+bool MemCpyOptPass::processImmutArgument(CallBase &CB, unsigned ArgNo) {
+ // 1. Ensure passed argument is immutable during call.
+ if (!(CB.paramHasAttr(ArgNo, Attribute::NoAlias) &&
+ CB.paramHasAttr(ArgNo, Attribute::NoCapture)))
+ return false;
+ const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout();
+ Value *ImmutArg = CB.getArgOperand(ArgNo);
+
+ // 2. Check that arg is alloca
+ // TODO: Even if the arg gets back to branches, we can remove memcpy if all
+ // the alloca alignments can be enforced to source alignment.
+ auto *AI = dyn_cast<AllocaInst>(ImmutArg->stripPointerCasts());
+ if (!AI)
+ return false;
+
+ std::optional<TypeSize> AllocaSize = AI->getAllocationSize(DL);
+ // Can't handle unknown size alloca.
+ // (e.g. Variable Length Array, Scalable Vector)
+ if (!AllocaSize || AllocaSize->isScalable())
+ return false;
+ MemoryLocation Loc(ImmutArg, LocationSize::precise(*AllocaSize));
+ MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
+ if (!CallAccess)
+ return false;
+
+ MemCpyInst *MDep = nullptr;
+ BatchAAResults BAA(*AA);
+ MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
+ CallAccess->getDefiningAccess(), Loc, BAA);
+ if (auto *MD = dyn_cast<MemoryDef>(Clobber))
+ MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
+
+ // If the immut argument isn't fed by a memcpy, ignore it. If it is fed by
+ // a memcpy, check that the arg equals the memcpy dest.
+ if (!MDep || MDep->isVolatile() || AI != MDep->getDest())
+ return false;
+
+ // The address space of the memcpy source must match the immut argument
+ if (MDep->getSource()->getType()->getPointerAddressSpace() !=
+ ImmutArg->getType()->getPointerAddressSpace())
+ return false;
+
+ // 2-1. The length of the memcpy must be equal to the size of the alloca.
+ auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
+ if (!MDepLen || AllocaSize != MDepLen->getValue())
+ return false;
+
+ // 2-2. the memcpy source align must be larger than or equal the alloca's
+ // align. If not so, we check to see if we can force the source of the memcpy
+ // to the alignment we need. If we fail, we bail out.
+ Align MemDepAlign = MDep->getSourceAlign().valueOrOne();
+ Align AllocaAlign = AI->getAlign();
+ if (MemDepAlign < AllocaAlign &&
+ getOrEnforceKnownAlignment(MDep->getSource(), AllocaAlign, DL, &CB, AC,
+ DT) < AllocaAlign)
+ return false;
+
+ // 3. Verify that the source doesn't change in between the memcpy and
+ // the call.
+ // memcpy(a <- b)
+ // *b = 42;
+ // foo(*a)
+ // It would be invalid to transform the second memcpy into foo(*b).
+ if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
+ MSSA->getMemoryAccess(MDep), CallAccess))
+ return false;
+
+ // 4. The memcpy src must not be modified during the call.
+ if (isModSet(AA->getModRefInfo(&CB, MemoryLocation::getForSource(MDep))))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to Immut src:\n"
+ << " " << *MDep << "\n"
+ << " " << CB << "\n");
+
+ // Otherwise we're good! Update the immut argument.
+ CB.setArgOperand(ArgNo, MDep->getSource());
+ ++NumMemCpyInstr;
+ return true;
+}
+
/// Executes one iteration of MemCpyOptPass.
bool MemCpyOptPass::iterateOnFunction(Function &F) {
bool MadeChange = false;
@@ -1632,9 +1727,12 @@ bool MemCpyOptPass::iterateOnFunction(Function &F) {
else if (auto *M = dyn_cast<MemMoveInst>(I))
RepeatInstruction = processMemMove(M);
else if (auto *CB = dyn_cast<CallBase>(I)) {
- for (unsigned i = 0, e = CB->arg_size(); i != e; ++i)
+ for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) {
if (CB->isByValArgument(i))
MadeChange |= processByValArgument(*CB, i);
+ else if (CB->onlyReadsMemory(i))
+ MadeChange |= processImmutArgument(*CB, i);
+ }
}
// Reprocess the instruction if desired.
diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy.ll b/llvm/test/Transforms/MemCpyOpt/memcpy.ll
index 9108402884d14..412ffd5d53e0e 100644
--- a/llvm/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memcpy.ll
@@ -395,13 +395,9 @@ declare void @f2(ptr)
declare void @f(ptr)
declare void @f_full_readonly(ptr nocapture noalias readonly)
-; TODO: Remove memcpy, which is guaranteed to be invariant
-; before and after the call because of its attributes.
define void @immut_param(ptr align 4 noalias %val) {
; CHECK-LABEL: @immut_param(
-; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAL1]], ptr align 4 [[VAL:%.*]], i64 1, i1 false)
-; CHECK-NEXT: call void @f(ptr noalias nocapture readonly align 4 [[VAL1]])
+; CHECK-NEXT: call void @f(ptr noalias nocapture readonly align 4 [[VAL:%.*]])
; CHECK-NEXT: ret void
;
%val1 = alloca i8, align 4
@@ -452,12 +448,9 @@ define void @immut_param_maywrite(ptr align 4 noalias %val) {
ret void
}
-; TODO: Remove memcpy
define void @immut_param_readonly(ptr align 4 noalias %val) {
; CHECK-LABEL: @immut_param_readonly(
-; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAL1]], ptr align 4 [[VAL:%.*]], i64 1, i1 false)
-; CHECK-NEXT: call void @f_full_readonly(ptr align 4 [[VAL1]])
+; CHECK-NEXT: call void @f_full_readonly(ptr align 4 [[VAL:%.*]])
; CHECK-NEXT: ret void
;
%val1 = alloca i8, align 4
@@ -466,12 +459,9 @@ define void @immut_param_readonly(ptr align 4 noalias %val) {
ret void
}
-; TODO: Remove memcpy
define void @immut_param_no_align(ptr align 4 noalias %val) {
; CHECK-LABEL: @immut_param_no_align(
-; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAL1]], ptr align 4 [[VAL:%.*]], i64 1, i1 false)
-; CHECK-NEXT: call void @f(ptr noalias nocapture readonly [[VAL1]])
+; CHECK-NEXT: call void @f(ptr noalias nocapture readonly [[VAL:%.*]])
; CHECK-NEXT: ret void
;
%val1 = alloca i8, align 4
@@ -580,12 +570,9 @@ define void @immut_param_
diff erent_addrespace(ptr addrspace(1) align 4 noalias %
ret void
}
-; TODO: remove memcpy
define void @immut_param_bigger_align(ptr align 16 noalias %val) {
; CHECK-LABEL: @immut_param_bigger_align(
-; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAL1]], ptr [[VAL:%.*]], i64 1, i1 false)
-; CHECK-NEXT: call void @f(ptr noalias nocapture readonly [[VAL1]])
+; CHECK-NEXT: call void @f(ptr noalias nocapture readonly [[VAL:%.*]])
; CHECK-NEXT: ret void
;
%val1 = alloca i8, align 4
@@ -608,14 +595,11 @@ define void @immut_param_smaller_align(ptr align 4 noalias %val) {
ret void
}
-; TODO: remove memcpy.
define void @immut_param_enforced_alignment() {
; CHECK-LABEL: @immut_param_enforced_alignment(
-; CHECK-NEXT: [[VAL:%.*]] = alloca i8, align 1
+; CHECK-NEXT: [[VAL:%.*]] = alloca i8, align 4
; CHECK-NEXT: store i32 42, ptr [[VAL]], align 4
-; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[VAL1]], ptr [[VAL]], i64 1, i1 false)
-; CHECK-NEXT: call void @f(ptr noalias nocapture readonly [[VAL1]])
+; CHECK-NEXT: call void @f(ptr noalias nocapture readonly [[VAL]])
; CHECK-NEXT: ret void
;
%val = alloca i8, align 1
@@ -659,14 +643,11 @@ define void @immut_but_alias_src(ptr %val) {
ret void
}
-; TODO: remove memcpy
define void @immut_unescaped_alloca() {
; CHECK-LABEL: @immut_unescaped_alloca(
; CHECK-NEXT: [[VAL:%.*]] = alloca i8, align 4
; CHECK-NEXT: store i32 42, ptr [[VAL]], align 4
-; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAL1]], ptr align 4 [[VAL]], i64 1, i1 false)
-; CHECK-NEXT: call void @f_full_readonly(ptr [[VAL1]])
+; CHECK-NEXT: call void @f_full_readonly(ptr [[VAL]])
; CHECK-NEXT: ret void
;
%val = alloca i8, align 4
More information about the llvm-commits
mailing list