[llvm-commits] [llvm] r116637 - in /llvm/trunk: lib/Transforms/Scalar/MemCpyOptimizer.cpp test/Transforms/MemCpyOpt/loadstore-sret.ll
Owen Anderson
resistor at mac.com
Fri Oct 15 15:52:12 PDT 2010
Author: resistor
Date: Fri Oct 15 17:52:12 2010
New Revision: 116637
URL: http://llvm.org/viewvc/llvm-project?rev=116637&view=rev
Log:
Generalize MemCpyOpt's handling of call slot forwarding to function properly when the call slot
forwarding is implemented with a load/store pair rather than a memcpy.
Added:
llvm/trunk/test/Transforms/MemCpyOpt/loadstore-sret.ll
Modified:
llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp
Modified: llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp?rev=116637&r1=116636&r2=116637&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp Fri Oct 15 17:52:12 2010
@@ -321,7 +321,8 @@
bool processStore(StoreInst *SI, BasicBlock::iterator &BBI);
bool processMemCpy(MemCpyInst *M);
bool processMemMove(MemMoveInst *M);
- bool performCallSlotOptzn(MemCpyInst *cpy, CallInst *C);
+ bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc,
+ uint64_t cpyLen, CallInst *C);
bool iterateOnFunction(Function &F);
};
@@ -339,7 +340,6 @@
INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
false, false)
-
/// processStore - When GVN is scanning forward over instructions, we look for
/// some other patterns to fold away. In particular, this looks for stores to
/// neighboring locations of memory. If it sees enough consequtive ones
@@ -347,6 +347,37 @@
bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (SI->isVolatile()) return false;
+ TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ if (!TD) return false;
+
+ // Detect cases where we're performing call slot forwarding, but
+ // happen to be using a load-store pair to implement it, rather than
+ // a memcpy.
+ if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
+ if (!LI->isVolatile() && LI->hasOneUse()) {
+ MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
+
+ MemDepResult dep = MD.getDependency(LI);
+ CallInst *C = 0;
+ if (dep.isClobber() && !isa<MemCpyInst>(dep.getInst()))
+ C = dyn_cast<CallInst>(dep.getInst());
+
+ if (C) {
+ bool changed = performCallSlotOptzn(LI,
+ SI->getPointerOperand()->stripPointerCasts(),
+ LI->getPointerOperand()->stripPointerCasts(),
+ TD->getTypeStoreSize(SI->getOperand(0)->getType()), C);
+ if (changed) {
+ MD.removeInstruction(SI);
+ SI->eraseFromParent();
+ LI->eraseFromParent();
+ ++NumMemCpyInstr;
+ return true;
+ }
+ }
+ }
+ }
+
LLVMContext &Context = SI->getContext();
// There are two cases that are interesting for this code to handle: memcpy
@@ -359,8 +390,6 @@
if (!ByteVal)
return false;
- TargetData *TD = getAnalysisIfAvailable<TargetData>();
- if (!TD) return false;
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
Module *M = SI->getParent()->getParent()->getParent();
@@ -494,7 +523,9 @@
/// performCallSlotOptzn - takes a memcpy and a call that it depends on,
/// and checks for the possibility of a call slot optimization by having
/// the call write its result directly into the destination of the memcpy.
-bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
+bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
+ Value *cpyDest, Value *cpySrc,
+ uint64_t cpyLen, CallInst *C) {
// The general transformation to keep in mind is
//
// call @func(..., src, ...)
@@ -511,16 +542,8 @@
// Deliberately get the source and destination with bitcasts stripped away,
// because we'll need to do type comparisons based on the underlying type.
- Value *cpyDest = cpy->getDest();
- Value *cpySrc = cpy->getSource();
CallSite CS(C);
- // We need to be able to reason about the size of the memcpy, so we require
- // that it be a constant.
- ConstantInt *cpyLength = dyn_cast<ConstantInt>(cpy->getLength());
- if (!cpyLength)
- return false;
-
// Require that src be an alloca. This simplifies the reasoning considerably.
AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
if (!srcAlloca)
@@ -537,7 +560,7 @@
uint64_t srcSize = TD->getTypeAllocSize(srcAlloca->getAllocatedType()) *
srcArraySize->getZExtValue();
- if (cpyLength->getZExtValue() < srcSize)
+ if (cpyLen < srcSize)
return false;
// Check that accessing the first srcSize bytes of dest will not cause a
@@ -606,7 +629,7 @@
// the use analysis, we also need to know that it does not sneakily
// access dest. We rely on AA to figure this out for us.
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
- if (AA.getModRefInfo(C, cpy->getRawDest(), srcSize) !=
+ if (AA.getModRefInfo(C, cpyDest, srcSize) !=
AliasAnalysis::NoModRef)
return false;
@@ -635,7 +658,6 @@
// Remove the memcpy
MD.removeInstruction(cpy);
- cpy->eraseFromParent();
++NumMemCpyInstr;
return true;
@@ -649,6 +671,10 @@
bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
+ // We can only optimize statically-sized memcpy's.
+ ConstantInt *cpyLen = dyn_cast<ConstantInt>(M->getLength());
+ if (!cpyLen) return false;
+
// The are two possible optimizations we can do for memcpy:
// a) memcpy-memcpy xform which exposes redundance for DSE.
// b) call-memcpy xform for return slot optimization.
@@ -656,8 +682,12 @@
if (!dep.isClobber())
return false;
if (!isa<MemCpyInst>(dep.getInst())) {
- if (CallInst *C = dyn_cast<CallInst>(dep.getInst()))
- return performCallSlotOptzn(M, C);
+ if (CallInst *C = dyn_cast<CallInst>(dep.getInst())) {
+ bool changed = performCallSlotOptzn(M, M->getDest(), M->getSource(),
+ cpyLen->getZExtValue(), C);
+ if (changed) M->eraseFromParent();
+ return changed;
+ }
return false;
}
Added: llvm/trunk/test/Transforms/MemCpyOpt/loadstore-sret.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/loadstore-sret.ll?rev=116637&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/loadstore-sret.ll (added)
+++ llvm/trunk/test/Transforms/MemCpyOpt/loadstore-sret.ll Fri Oct 15 17:52:12 2010
@@ -0,0 +1,25 @@
+; RUN: opt -S < %s -memcpyopt | FileCheck %s
+; <rdar://problem/8536696>
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+%"class.std::auto_ptr" = type { i32* }
+
+; CHECK: @_Z3foov
+define void @_Z3foov(%"class.std::auto_ptr"* noalias nocapture sret %agg.result) ssp {
+_ZNSt8auto_ptrIiED1Ev.exit:
+ %temp.lvalue = alloca %"class.std::auto_ptr", align 8
+; CHECK: call void @_Z3barv(%"class.std::auto_ptr"* sret %agg.result)
+ call void @_Z3barv(%"class.std::auto_ptr"* sret %temp.lvalue)
+ %tmp.i.i = getelementptr inbounds %"class.std::auto_ptr"* %temp.lvalue, i64 0, i32 0
+; CHECK-NOT: load
+ %tmp2.i.i = load i32** %tmp.i.i, align 8
+ %tmp.i.i4 = getelementptr inbounds %"class.std::auto_ptr"* %agg.result, i64 0, i32 0
+; CHECK-NOT: store
+ store i32* %tmp2.i.i, i32** %tmp.i.i4, align 8
+; CHECK: ret void
+ ret void
+}
+
+declare void @_Z3barv(%"class.std::auto_ptr"* sret)
More information about the llvm-commits
mailing list