[llvm-commits] [llvm] r41479 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/2004-02-12-Memcpy.llx
Tanya Lattner
lattner at apple.com
Mon Aug 27 13:34:16 PDT 2007
Rafael,
test/CodeGen/X86/2004-02-12-Memcpy.llx is failing on x86 darwin:
Here is the output in case it helps:
lattner% llvm-as < test/CodeGen/X86/2004-02-12-Memcpy.llx | llc -
march=x86
.text
.align 4,0x90
.globl _main
_main:
pushl %edi
pushl %esi
subl $20, %esp
fnstcw 18(%esp)
movb $2, 19(%esp)
fldcw 18(%esp)
movl $_B, %esi
movl $_A, %edi
movl $32, %ecx
rep;movsl
movl $128, 8(%esp)
movl $_B, 4(%esp)
movl $_A, (%esp)
call L_memcpy$stub
movl $128, 8(%esp)
movl $_B, 4(%esp)
movl $_A, (%esp)
call L_memcpy$stub
addl $20, %esp
popl %esi
popl %edi
ret
.globl _A
.zerofill __DATA__, __common, _A, 128, 2
.globl _B
.zerofill __DATA__, __common, _B, 128, 2
.section __IMPORT,__jump_table,symbol_stubs,self_modifying_code
+pure_instructions,5
L_memcpy$stub:
.indirect_symbol _memcpy
hlt ; hlt ; hlt ; hlt ; hlt
.subsections_via_symbols
-Tanya
On Aug 27, 2007, at 3:18 AM, Rafael Espindola wrote:
> Author: rafael
> Date: Mon Aug 27 05:18:20 2007
> New Revision: 41479
>
> URL: http://llvm.org/viewvc/llvm-project?rev=41479&view=rev
> Log:
> call libc memcpy/memset if array size is bigger then threshold.
>
> Coping 100MB array (after a warmup) shows that glibc 2.6.1
> implementation on
> x86-64 (core 2) is 30% faster (from 0.270917s to 0.188079s)
>
>
> Modified:
> llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> llvm/trunk/test/CodeGen/X86/2004-02-12-Memcpy.llx
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/
> X86ISelLowering.cpp?rev=41479&r1=41478&r2=41479&view=diff
>
> ======================================================================
> ========
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Aug 27
> 05:18:20 2007
> @@ -3753,10 +3753,10 @@
> if (Align == 0) Align = 1;
>
> ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
> - // If not DWORD aligned, call memset if size is less than the
> threshold.
> + // If not DWORD aligned or size is more than the threshold, call
> memset.
> // It knows how to align to the right boundary first.
> if ((Align & 3) != 0 ||
> - (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold
> ())) {
> + (I && I->getValue() > Subtarget->getMinRepStrSizeThreshold
> ())) {
> MVT::ValueType IntPtr = getPointerTy();
> const Type *IntPtrTy = getTargetData()->getIntPtrType();
> TargetLowering::ArgListTy Args;
> @@ -3909,10 +3909,10 @@
> if (Align == 0) Align = 1;
>
> ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
> - // If not DWORD aligned, call memcpy if size is less than the
> threshold.
> + // If not DWORD aligned or size is more than the threshold, call
> memcpy.
> // It knows how to align to the right boundary first.
> if ((Align & 3) != 0 ||
> - (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold
> ())) {
> + (I && I->getValue() > Subtarget->getMinRepStrSizeThreshold
> ())) {
> MVT::ValueType IntPtr = getPointerTy();
> TargetLowering::ArgListTy Args;
> TargetLowering::ArgListEntry Entry;
>
> Modified: llvm/trunk/test/CodeGen/X86/2004-02-12-Memcpy.llx
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/
> X86/2004-02-12-Memcpy.llx?rev=41479&r1=41478&r2=41479&view=diff
>
> ======================================================================
> ========
> --- llvm/trunk/test/CodeGen/X86/2004-02-12-Memcpy.llx (original)
> +++ llvm/trunk/test/CodeGen/X86/2004-02-12-Memcpy.llx Mon Aug 27
> 05:18:20 2007
> @@ -1,24 +1,26 @@
> -; RUN: llvm-upgrade < %s | llvm-as | llc -march=x86 | grep movs
> -declare void %llvm.memcpy.i32(sbyte* %A, sbyte* %B, uint %amt,
> uint %align)
> +; RUN: llvm-as < %s | llc -march=x86 | grep movs | count 1
> +; RUN: llvm-as < %s | llc -march=x86 | grep memcpy | count 2
>
> -%A = global [1000 x int] zeroinitializer
> -%B = global [1000 x int] zeroinitializer
> + at A = global [32 x i32] zeroinitializer
> + at B = global [32 x i32] zeroinitializer
>
> +declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
>
> -void %main() {
> +define void @main() {
> ; dword copy
> - call void %llvm.memcpy.i32(sbyte* cast (int* getelementptr
> ([1000 x int]* %A, long 0, long 0) to sbyte*),
> - sbyte* cast (int* getelementptr ([1000
> x int]* %B, long 0, long 0) to sbyte*),
> - uint 4000, uint 4)
> + call void @llvm.memcpy.i32(i8* bitcast ([32 x i32]* @A to i8*),
> + i8* bitcast ([32 x i32]* @B to i8*),
> + i32 128, i32 4 )
>
> ; word copy
> - call void %llvm.memcpy.i32(sbyte* cast (int* getelementptr
> ([1000 x int]* %A, long 0, long 0) to sbyte*),
> - sbyte* cast (int* getelementptr ([1000
> x int]* %B, long 0, long 0) to sbyte*),
> - uint 4000, uint 2)
> + call void @llvm.memcpy.i32( i8* bitcast ([32 x i32]* @A to i8*),
> + i8* bitcast ([32 x i32]* @B to i8*),
> + i32 128, i32 2 )
>
> ; byte copy
> - call void %llvm.memcpy.i32(sbyte* cast (int* getelementptr
> ([1000 x int]* %A, long 0, long 0) to sbyte*),
> - sbyte* cast (int* getelementptr ([1000
> x int]* %B, long 0, long 0) to sbyte*),
> - uint 4000, uint 1)
> + call void @llvm.memcpy.i32( i8* bitcast ([32 x i32]* @A to i8*),
> + i8* bitcast ([32 x i32]* @B to i8*),
> + i32 128, i32 1 )
> +
> ret void
> }
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list