[llvm-commits] [llvm] r66366 - in /llvm/trunk: lib/Transforms/Scalar/ScalarReplAggregates.cpp test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll test/Transforms/ScalarRepl/vector_memcpy.ll
Chris Lattner
sabre at nondot.org
Sat Mar 7 20:04:22 PST 2009
Author: lattner
Date: Sat Mar 7 22:04:21 2009
New Revision: 66366
URL: http://llvm.org/viewvc/llvm-project?rev=66366&view=rev
Log:
Enhance SROA to "promote to scalar" allocas which are
memcpy/memmove'd into or out of. This fixes a serious
perf issue that Nate ran into.
Added:
llvm/trunk/test/Transforms/ScalarRepl/vector_memcpy.ll
Modified:
llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp
llvm/trunk/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll
Modified: llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp?rev=66366&r1=66365&r2=66366&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp Sat Mar 7 22:04:21 2009
@@ -1356,6 +1356,16 @@
continue;
}
}
+
+ // If this is a memcpy or memmove into or out of the whole allocation, we
+ // can handle it like a load or store of the scalar type.
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
+ if (ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength()))
+ if (Len->getZExtValue() == AllocaSize && Offset == 0) {
+ IsNotTrivial = true;
+ continue;
+ }
+ }
// Ignore dbg intrinsic.
if (isa<DbgInfoIntrinsic>(User))
@@ -1440,6 +1450,44 @@
MSI->eraseFromParent();
continue;
}
+
+ // If this is a memcpy or memmove into or out of the whole allocation, we
+ // can handle it like a load or store of the scalar type.
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
+ assert(Offset == 0 && "must be store to start of alloca");
+
+ // If the source and destination are both to the same alloca, then this is
+ // a noop copy-to-self, just delete it. Otherwise, emit a load and store
+ // as appropriate.
+ AllocaInst *OrigAI = cast<AllocaInst>(Ptr->getUnderlyingObject());
+
+ if (MTI->getSource()->getUnderlyingObject() != OrigAI) {
+ // Dest must be OrigAI, change this to be a load from the original
+ // pointer (bitcasted), then a store to our new alloca.
+ assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
+ Value *SrcPtr = MTI->getSource();
+ SrcPtr = Builder.CreateBitCast(SrcPtr, NewAI->getType());
+
+ LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
+ SrcVal->setAlignment(MTI->getAlignment());
+ Builder.CreateStore(SrcVal, NewAI);
+ } else if (MTI->getDest()->getUnderlyingObject() != OrigAI) {
+ // Src must be OrigAI, change this to be a load from NewAI then a store
+ // through the original dest pointer (bitcasted).
+ assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
+ LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval");
+
+ Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), NewAI->getType());
+ StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr);
+ NewStore->setAlignment(MTI->getAlignment());
+ } else {
+ // Noop transfer. Src == Dst
+ }
+
+
+ MTI->eraseFromParent();
+ continue;
+ }
// If user is a dbg info intrinsic then it is safe to remove it.
if (isa<DbgInfoIntrinsic>(User)) {
Modified: llvm/trunk/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll?rev=66366&r1=66365&r2=66366&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll (original)
+++ llvm/trunk/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll Sat Mar 7 22:04:21 2009
@@ -6,12 +6,11 @@
define void @memtest1(i8* %dst, i8* %src) nounwind {
entry:
- %temp = alloca [100 x i8] ; <[100 x i8]*> [#uses=2]
- %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- %temp1 = bitcast [100 x i8]* %temp to i8* ; <i8*> [#uses=1]
- call void @llvm.memcpy.i32( i8* %temp1, i8* %src, i32 100, i32 1 )
- %temp3 = bitcast [100 x i8]* %temp to i8* ; <i8*> [#uses=1]
- call void @llvm.memcpy.i32( i8* %dst, i8* %temp3, i32 100, i32 1 )
+ %temp = alloca [200 x i8] ; <[100 x i8]*> [#uses=2]
+ %temp1 = bitcast [200 x i8]* %temp to i8* ; <i8*> [#uses=1]
+ call void @llvm.memcpy.i32( i8* %temp1, i8* %src, i32 200, i32 1 )
+ %temp3 = bitcast [200 x i8]* %temp to i8* ; <i8*> [#uses=1]
+ call void @llvm.memcpy.i32( i8* %dst, i8* %temp3, i32 200, i32 1 )
ret void
}
Added: llvm/trunk/test/Transforms/ScalarRepl/vector_memcpy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ScalarRepl/vector_memcpy.ll?rev=66366&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ScalarRepl/vector_memcpy.ll (added)
+++ llvm/trunk/test/Transforms/ScalarRepl/vector_memcpy.ll Sat Mar 7 22:04:21 2009
@@ -0,0 +1,15 @@
+; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | grep {ret <16 x float> %A}
+define <16 x float> @foo(<16 x float> %A) nounwind {
+ %tmp = alloca <16 x float>, align 16
+ %tmp2 = alloca <16 x float>, align 16
+ store <16 x float> %A, <16 x float>* %tmp
+ %s = bitcast <16 x float>* %tmp to i8*
+ %s2 = bitcast <16 x float>* %tmp2 to i8*
+ call void @llvm.memcpy.i64(i8* %s2, i8* %s, i64 64, i32 16)
+
+ %R = load <16 x float>* %tmp2
+ ret <16 x float> %R
+}
+
+declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
+
More information about the llvm-commits
mailing list