[LLVMbugs] [Bug 5968] New: Some useless load/stores in a kernel
bugzilla-daemon at cs.uiuc.edu
bugzilla-daemon at cs.uiuc.edu
Thu Jan 7 00:54:46 PST 2010
http://llvm.org/bugs/show_bug.cgi?id=5968
Summary: Some useless load/stores in a kernel
Product: new-bugs
Version: 2.6
Platform: PC
OS/Version: Windows XP
Status: NEW
Keywords: code-quality
Severity: normal
Priority: P2
Component: new bugs
AssignedTo: unassignedbugs at nondot.org
ReportedBy: bearophile at mailas.com
CC: llvmbugs at cs.uiuc.edu
In the attach there is a small C program that shows a number of redundant
load/stores from memory.
On Windows I compile it with (or the same with gcc):
llvm-gcc -Wall -O3 -s -S -fomit-frame-pointer -Wl,--enable-stdcall-fixup -msse3
-march=native lsolver.c -o lsolver_llvm.s
Where -Wl,--enable-stdcall-fixup fixes a llvm bug.
This is the good asm generated by gcc 4.3.3:
L26:
fldl -8(%edx,%eax,8)
fldl -8(%ebx,%eax,8)
faddl -8(%ecx,%eax,8)
fmul %st(4), %st
fldl -16(%edx,%eax,8)
faddl (%edx,%eax,8)
fmul %st(6), %st
faddp %st, %st(1)
fmul %st(3), %st
fstl -8(%edx,%eax,8)
incl %eax
fsubp %st, %st(1)
cmpl %edi, %eax
fmul %st(0), %st
faddp %st, %st(1)
jne L26
Again with gcc, but with -mfpmath=sse for force the usage of SSE, uses 5 loads
and 1 store:
L26:
movsd -8(%edx,%eax,8), %xmm0
movsd -8(%ebx,%eax,8), %xmm1
movsd -16(%edx,%eax,8), %xmm2
addsd -8(%ecx,%eax,8), %xmm1
addsd (%edx,%eax,8), %xmm2
mulsd %xmm5, %xmm1
mulsd %xmm6, %xmm2
addsd %xmm2, %xmm1
mulsd %xmm4, %xmm1
movsd %xmm1, -8(%edx,%eax,8)
subsd %xmm0, %xmm1
incl %eax
mulsd %xmm1, %xmm1
cmpl %edi, %eax
addsd %xmm1, %xmm3
jne L26
Compiled with LLVM-gcc 2.6 (32 bit), uses 11 loads and 1 store (very similar
asm is produced by the D LDC compiler too):
LBB5_4:
movl 8(%eax,%ecx,4), %ebx
movl (%eax,%ecx,4), %ebp
movsd 8(%ebp,%esi,8), %xmm4
addsd 8(%ebx,%esi,8), %xmm4
mulsd %xmm1, %xmm4
movsd (%edx,%esi,8), %xmm5
addsd 16(%edx,%esi,8), %xmm5
mulsd %xmm2, %xmm5
addsd %xmm4, %xmm5
mulsd %xmm3, %xmm5
movsd 8(%edi,%esi,8), %xmm4 ; ***
movsd %xmm5, 8(%edi,%esi,8) ; ***
movl 4(%eax,%ecx,4), %edx
movsd 8(%edx,%esi,8), %xmm5 ; ***
subsd %xmm4, %xmm5
mulsd %xmm5, %xmm5
addsd %xmm5, %xmm0
incl %esi
cmpl 4(%esp), %esi
movl %edx, %edi
jne LBB5_4
Compiled with llvm-gcc (V. 2.7 trunk) x86-64 linux, asm by <baldrick> on IRC,
the situation is a little better, 20 instructions instead of 21, 8 loads + 1
store:
.LBB5_4:
movq (%rcx,%rdi,8), %r11
movq 16(%rcx,%rdi,8), %r10
addsd 16(%r8,%r9,8), %xmm4
movsd 8(%r8,%r9,8), %xmm6
mulsd %xmm1, %xmm4
movsd 8(%r11,%r9,8), %xmm5
addsd 8(%r10,%r9,8), %xmm5
mulsd %xmm2, %xmm5
addsd %xmm4, %xmm5
mulsd %xmm3, %xmm5
movsd %xmm5, 8(%r8,%r9,8)
movq 8(%rcx,%rdi,8), %r8
movsd 8(%r8,%r9,8), %xmm4
incq %r9
cmpq %rsi, %r9
movapd %xmm4, %xmm5
subsd %xmm6, %xmm5
mulsd %xmm5, %xmm5
addsd %xmm5, %xmm0
jne .LBB5_4
Those redudant loads and stores (in the 32 bit version) can also be seen
comparing the running time of the llvm-gcc version with the gcc version.
--
Configure bugmail: http://llvm.org/bugs/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
You are on the CC list for the bug.
More information about the llvm-bugs
mailing list