[llvm-commits] [llvm] r119694 - in /llvm/trunk: lib/Transforms/Scalar/MemCpyOptimizer.cpp test/Transforms/MemCpyOpt/2008-04-29-SRetRemoval.ll test/Transforms/MemCpyOpt/memcpy.ll
Chris Lattner
sabre at nondot.org
Thu Nov 18 00:00:57 PST 2010
Author: lattner
Date: Thu Nov 18 02:00:57 2010
New Revision: 119694
URL: http://llvm.org/viewvc/llvm-project?rev=119694&view=rev
Log:
remove a pointless restriction from memcpyopt. It was
refusing to optimize two memcpy's like this:
copy A <- B
copy C <- A
if it couldn't prove that noalias(B,C). We can eliminate
the copy by producing a memmove instead of memcpy.
Removed:
llvm/trunk/test/Transforms/MemCpyOpt/2008-04-29-SRetRemoval.ll
Modified:
llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp
llvm/trunk/test/Transforms/MemCpyOpt/memcpy.ll
Modified: llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp?rev=119694&r1=119693&r2=119694&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp Thu Nov 18 02:00:57 2010
@@ -688,11 +688,14 @@
if (DepSize < MSize)
return false;
- // Finally, we have to make sure that the dest of the second does not
- // alias the source of the first.
+ Intrinsic::ID ResultFn = Intrinsic::memcpy;
+
+ // If the dest of the second might alias the source of the first, then the
+ // source and dest might overlap. We still want to eliminate the intermediate
+ // value, but we have to generate a memmove instead of memcpy.
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
if (!AA.isNoAlias(M->getRawDest(), MSize, MDep->getRawSource(), DepSize))
- return false;
+ ResultFn = Intrinsic::memmove;
// If all checks passed, then we can transform these memcpy's
const Type *ArgTys[3] = {
@@ -702,7 +705,7 @@
};
Function *MemCpyFun =
Intrinsic::getDeclaration(M->getParent()->getParent()->getParent(),
- M->getIntrinsicID(), ArgTys, 3);
+ ResultFn, ArgTys, 3);
// Make sure to use the lesser of the alignment of the source and the dest
// since we're changing where we're reading from, but don't want to increase
Removed: llvm/trunk/test/Transforms/MemCpyOpt/2008-04-29-SRetRemoval.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/2008-04-29-SRetRemoval.ll?rev=119693&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/2008-04-29-SRetRemoval.ll (original)
+++ llvm/trunk/test/Transforms/MemCpyOpt/2008-04-29-SRetRemoval.ll (removed)
@@ -1,17 +0,0 @@
-; RUN: opt < %s -memcpyopt -S | grep {call.*memcpy.*agg.result}
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-apple-darwin8"
- at x = external global { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*> [#uses=1]
-
-define void @foo({ x86_fp80, x86_fp80 }* noalias sret %agg.result) nounwind {
-entry:
- %x.0 = alloca { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*> [#uses=1]
- %x.01 = bitcast { x86_fp80, x86_fp80 }* %x.0 to i8* ; <i8*> [#uses=2]
- call void @llvm.memcpy.i32( i8* %x.01, i8* bitcast ({ x86_fp80, x86_fp80 }* @x to i8*), i32 32, i32 16 )
- %agg.result2 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8* ; <i8*> [#uses=1]
- call void @llvm.memcpy.i32( i8* %agg.result2, i8* %x.01, i32 32, i32 16 )
- ret void
-}
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
Modified: llvm/trunk/test/Transforms/MemCpyOpt/memcpy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/memcpy.ll?rev=119694&r1=119693&r2=119694&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/memcpy.ll (original)
+++ llvm/trunk/test/Transforms/MemCpyOpt/memcpy.ll Thu Nov 18 02:00:57 2010
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -memcpyopt -dse -S | grep {call.*memcpy} | count 1
+; RUN: opt < %s -basicaa -memcpyopt -dse -S | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "i686-apple-darwin9"
@@ -20,7 +20,7 @@
; CHECK: @test1
; CHECK: call void @ccoshl
-; CHECK: call @llvm.memcpy
+; CHECK: call void @llvm.memcpy
; CHECK-NOT: llvm.memcpy
; CHECK: ret void
ret void
@@ -29,3 +29,36 @@
declare void @ccoshl({ x86_fp80, x86_fp80 }* sret , x86_fp80, x86_fp80) nounwind
declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
+
+
+; The intermediate alloca and one of the memcpy's should be eliminated, the
+; other should be related with a memmove.
+define void @test2(i8* %P, i8* %Q) nounwind {
+ %memtmp = alloca { x86_fp80, x86_fp80 }, align 16
+ %R = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8*
+ call void @llvm.memcpy.i32( i8* %R, i8* %P, i32 32, i32 16 )
+ call void @llvm.memcpy.i32( i8* %Q, i8* %R, i32 32, i32 16 )
+ ret void
+
+; CHECK: @test2
+; CHECK-NEXT: call void @llvm.memmove{{.*}}(i8* %Q, i8* %P
+; CHECK-NEXT: ret void
+}
+
+
+
+
+ at x = external global { x86_fp80, x86_fp80 }
+
+define void @test3({ x86_fp80, x86_fp80 }* noalias sret %agg.result) nounwind {
+ %x.0 = alloca { x86_fp80, x86_fp80 }
+ %x.01 = bitcast { x86_fp80, x86_fp80 }* %x.0 to i8*
+ call void @llvm.memcpy.i32( i8* %x.01, i8* bitcast ({ x86_fp80, x86_fp80 }* @x to i8*), i32 32, i32 16 )
+ %agg.result2 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8*
+ call void @llvm.memcpy.i32( i8* %agg.result2, i8* %x.01, i32 32, i32 16 )
+ ret void
+; CHECK: @test3
+; CHECK-NEXT: %agg.result2 = bitcast
+; CHECK-NEXT: call void @llvm.memcpy
+; CHECK-NEXT: ret void
+}
More information about the llvm-commits
mailing list