[llvm-commits] [llvm] r41479 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/2004-02-12-Memcpy.llx
Rafael Espindola
rafael.espindola at gmail.com
Mon Aug 27 03:18:21 PDT 2007
Author: rafael
Date: Mon Aug 27 05:18:20 2007
New Revision: 41479
URL: http://llvm.org/viewvc/llvm-project?rev=41479&view=rev
Log:
call libc memcpy/memset if array size is bigger then threshold.
Coping 100MB array (after a warmup) shows that glibc 2.6.1 implementation on
x86-64 (core 2) is 30% faster (from 0.270917s to 0.188079s)
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/2004-02-12-Memcpy.llx
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=41479&r1=41478&r2=41479&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Aug 27 05:18:20 2007
@@ -3753,10 +3753,10 @@
if (Align == 0) Align = 1;
ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
- // If not DWORD aligned, call memset if size is less than the threshold.
+ // If not DWORD aligned or size is more than the threshold, call memset.
// It knows how to align to the right boundary first.
if ((Align & 3) != 0 ||
- (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
+ (I && I->getValue() > Subtarget->getMinRepStrSizeThreshold())) {
MVT::ValueType IntPtr = getPointerTy();
const Type *IntPtrTy = getTargetData()->getIntPtrType();
TargetLowering::ArgListTy Args;
@@ -3909,10 +3909,10 @@
if (Align == 0) Align = 1;
ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
- // If not DWORD aligned, call memcpy if size is less than the threshold.
+ // If not DWORD aligned or size is more than the threshold, call memcpy.
// It knows how to align to the right boundary first.
if ((Align & 3) != 0 ||
- (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
+ (I && I->getValue() > Subtarget->getMinRepStrSizeThreshold())) {
MVT::ValueType IntPtr = getPointerTy();
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Modified: llvm/trunk/test/CodeGen/X86/2004-02-12-Memcpy.llx
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2004-02-12-Memcpy.llx?rev=41479&r1=41478&r2=41479&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/2004-02-12-Memcpy.llx (original)
+++ llvm/trunk/test/CodeGen/X86/2004-02-12-Memcpy.llx Mon Aug 27 05:18:20 2007
@@ -1,24 +1,26 @@
-; RUN: llvm-upgrade < %s | llvm-as | llc -march=x86 | grep movs
-declare void %llvm.memcpy.i32(sbyte* %A, sbyte* %B, uint %amt, uint %align)
+; RUN: llvm-as < %s | llc -march=x86 | grep movs | count 1
+; RUN: llvm-as < %s | llc -march=x86 | grep memcpy | count 2
-%A = global [1000 x int] zeroinitializer
-%B = global [1000 x int] zeroinitializer
+ at A = global [32 x i32] zeroinitializer
+ at B = global [32 x i32] zeroinitializer
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-void %main() {
+define void @main() {
; dword copy
- call void %llvm.memcpy.i32(sbyte* cast (int* getelementptr ([1000 x int]* %A, long 0, long 0) to sbyte*),
- sbyte* cast (int* getelementptr ([1000 x int]* %B, long 0, long 0) to sbyte*),
- uint 4000, uint 4)
+ call void @llvm.memcpy.i32(i8* bitcast ([32 x i32]* @A to i8*),
+ i8* bitcast ([32 x i32]* @B to i8*),
+ i32 128, i32 4 )
; word copy
- call void %llvm.memcpy.i32(sbyte* cast (int* getelementptr ([1000 x int]* %A, long 0, long 0) to sbyte*),
- sbyte* cast (int* getelementptr ([1000 x int]* %B, long 0, long 0) to sbyte*),
- uint 4000, uint 2)
+ call void @llvm.memcpy.i32( i8* bitcast ([32 x i32]* @A to i8*),
+ i8* bitcast ([32 x i32]* @B to i8*),
+ i32 128, i32 2 )
; byte copy
- call void %llvm.memcpy.i32(sbyte* cast (int* getelementptr ([1000 x int]* %A, long 0, long 0) to sbyte*),
- sbyte* cast (int* getelementptr ([1000 x int]* %B, long 0, long 0) to sbyte*),
- uint 4000, uint 1)
+ call void @llvm.memcpy.i32( i8* bitcast ([32 x i32]* @A to i8*),
+ i8* bitcast ([32 x i32]* @B to i8*),
+ i32 128, i32 1 )
+
ret void
}
More information about the llvm-commits
mailing list