[LLVMbugs] [Bug 12392] New: Poor performance due to register spilling when no spilling is necessary
bugzilla-daemon at llvm.org
bugzilla-daemon at llvm.org
Wed Mar 28 22:20:10 PDT 2012
http://llvm.org/bugs/show_bug.cgi?id=12392
Bug #: 12392
Summary: Poor performance due to register spilling when no
spilling is necessary
Product: new-bugs
Version: 3.0
Platform: PC
OS/Version: Windows XP
Status: NEW
Severity: normal
Priority: P
Component: new bugs
AssignedTo: unassignedbugs at nondot.org
ReportedBy: brenthwalker at gmail.com
CC: llvmbugs at cs.uiuc.edu
Classification: Unclassified
I have run into the following strange llvm behavior. For the C program below,
function sum() gets inlined in foo() but the code generated looks very
suboptimal (the code is an extract from a larger program).
Below I show the 32-bit x86 assembly as produced by the demo page on
the llvm home page ("Output A"). As you can see from the assembly,
after sum() is inlined and the loop unrolled, the generated code
loads all values of array v (aka &x[i]) into registers before adding
any numbers up -- in the process it runs out of registers and starts
spilling (in essense copying the doubles from one area of memory to
another). After that, it proceeds to add the numbers up.
But why not add the numbers into 1 register directly? Clearly this is
what the C code is doing -- nothing could have been more explicit.
The really strange thing, is that in the assingment to p[i] is removed
(line marked with "xxx..."), then the code produced is optimal and
exactly what one expects. I show this result in "Output B" where you
get a beatiful sequence of addsd into register xmm2.
It's all very strange and it points to some questionable decision
making on the part of llvm. I tried different versions of the sum()
function (elliminating the loop for example) but it does not help.
Another observation is that the loop variable i (in foo) must be
involved: if one does *p = 5 (instead of p[i] = 5), the problem also
goes away.
Until a fix comes, if you have a suggestion on how to get around this problem
please let me know.
Here is the code:
double sum( double* v, int v_siz )
{
double sum = 0.0;
int i = 0;
for (; i != v_siz; ++i)
sum += v[i];
return sum;
}
double foo(double *x, int *p, int k)
{
double s = 0.0;
for (int i = 0; i != k;++i)
{
s += sum(&x[i], 18);
p[i] = 5; // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
}
return s;
}
====== Output A ======
======================
foo: # @foo
.Ltmp12:
.cfi_startproc
# BB#0:
pushl %ebx
.Ltmp13:
.cfi_def_cfa_offset 8
pushl %edi
.Ltmp14:
.cfi_def_cfa_offset 12
pushl %esi
.Ltmp15:
.cfi_def_cfa_offset 16
subl $88, %esp
.Ltmp16:
.cfi_def_cfa_offset 104
.Ltmp17:
.cfi_offset %esi, -16
.Ltmp18:
.cfi_offset %edi, -12
.Ltmp19:
.cfi_offset %ebx, -8
pxor %xmm0, %xmm0
movl 112(%esp), %eax
testl %eax, %eax
je .LBB1_3
# BB#1:
xorl %ebx, %ebx
movl 108(%esp), %ecx
movl 104(%esp), %edx
xorl %esi, %esi
.align 16, 0x90
.LBB1_2: # %.lr.ph.i
# =>This Inner Loop Header: Depth=1
movsd (%edx,%ebx,8), %xmm2
addsd .LCPI1_0, %xmm2
movsd 16(%edx,%ebx,8), %xmm1
movsd %xmm1, (%esp) # 8-byte Spill
movl %ebx, %edi
addl $1, %edi
addsd (%edx,%edi,8), %xmm2
movsd 136(%edx,%ebx,8), %xmm1
movsd %xmm1, 72(%esp) # 8-byte Spill
movsd 128(%edx,%ebx,8), %xmm1
movsd %xmm1, 64(%esp) # 8-byte Spill
movsd 120(%edx,%ebx,8), %xmm1
movsd %xmm1, 56(%esp) # 8-byte Spill
movsd 112(%edx,%ebx,8), %xmm1
movsd %xmm1, 48(%esp) # 8-byte Spill
movsd 104(%edx,%ebx,8), %xmm1
movsd %xmm1, 40(%esp) # 8-byte Spill
movsd 96(%edx,%ebx,8), %xmm1
movsd %xmm1, 32(%esp) # 8-byte Spill
movsd 88(%edx,%ebx,8), %xmm1
movsd %xmm1, 24(%esp) # 8-byte Spill
movsd 80(%edx,%ebx,8), %xmm1
movsd %xmm1, 16(%esp) # 8-byte Spill
movsd 72(%edx,%ebx,8), %xmm1
movsd %xmm1, 8(%esp) # 8-byte Spill
movsd 64(%edx,%ebx,8), %xmm7
movsd 56(%edx,%ebx,8), %xmm1
movsd 48(%edx,%ebx,8), %xmm3
movsd 40(%edx,%ebx,8), %xmm4
movsd 32(%edx,%ebx,8), %xmm5
movsd 24(%edx,%ebx,8), %xmm6
movl $5, (%ecx,%ebx,4)
addsd (%esp), %xmm2 # 8-byte Folded Reload
addsd %xmm6, %xmm2
addsd %xmm5, %xmm2
addsd %xmm4, %xmm2
addsd %xmm3, %xmm2
addsd %xmm1, %xmm2
addsd %xmm7, %xmm2
addsd 8(%esp), %xmm2 # 8-byte Folded Reload
addsd 16(%esp), %xmm2 # 8-byte Folded Reload
addsd 24(%esp), %xmm2 # 8-byte Folded Reload
addsd 32(%esp), %xmm2 # 8-byte Folded Reload
addsd 40(%esp), %xmm2 # 8-byte Folded Reload
addsd 48(%esp), %xmm2 # 8-byte Folded Reload
addsd 56(%esp), %xmm2 # 8-byte Folded Reload
addsd 64(%esp), %xmm2 # 8-byte Folded Reload
addsd 72(%esp), %xmm2 # 8-byte Folded Reload
addsd %xmm2, %xmm0
adcl $0, %esi
cmpl %eax, %edi
movl %edi, %ebx
jne .LBB1_2
.LBB1_3: # %._crit_edge
movsd %xmm0, 80(%esp)
fldl 80(%esp)
addl $88, %esp
popl %esi
popl %edi
popl %ebx
ret
.Ltmp20:
.size foo, .Ltmp20-foo
.Ltmp21:
.cfi_endproc
.Leh_func_end1:
====== Output B ======
======================
foo: # @foo
.Ltmp11:
.cfi_startproc
# BB#0:
pushl %edi
.Ltmp12:
.cfi_def_cfa_offset 8
pushl %esi
.Ltmp13:
.cfi_def_cfa_offset 12
subl $12, %esp
.Ltmp14:
.cfi_def_cfa_offset 24
.Ltmp15:
.cfi_offset %esi, -12
.Ltmp16:
.cfi_offset %edi, -8
pxor %xmm0, %xmm0
movl 32(%esp), %eax
testl %eax, %eax
je .LBB1_3
# BB#1:
xorl %esi, %esi
movl 24(%esp), %ecx
pxor %xmm1, %xmm1
xorl %edx, %edx
.align 16, 0x90
.LBB1_2: # %.lr.ph.i
# =>This Inner Loop Header: Depth=1
movsd (%ecx,%esi,8), %xmm2
addsd %xmm1, %xmm2
movl %esi, %edi
addl $1, %edi
addsd (%ecx,%edi,8), %xmm2
addsd 16(%ecx,%esi,8), %xmm2
addsd 24(%ecx,%esi,8), %xmm2
addsd 32(%ecx,%esi,8), %xmm2
addsd 40(%ecx,%esi,8), %xmm2
addsd 48(%ecx,%esi,8), %xmm2
addsd 56(%ecx,%esi,8), %xmm2
addsd 64(%ecx,%esi,8), %xmm2
addsd 72(%ecx,%esi,8), %xmm2
addsd 80(%ecx,%esi,8), %xmm2
addsd 88(%ecx,%esi,8), %xmm2
addsd 96(%ecx,%esi,8), %xmm2
addsd 104(%ecx,%esi,8), %xmm2
addsd 112(%ecx,%esi,8), %xmm2
addsd 120(%ecx,%esi,8), %xmm2
addsd 128(%ecx,%esi,8), %xmm2
addsd 136(%ecx,%esi,8), %xmm2
addsd %xmm2, %xmm0
adcl $0, %edx
cmpl %eax, %edi
movl %edi, %esi
jne .LBB1_2
.LBB1_3: # %._crit_edge
movsd %xmm0, (%esp)
fldl (%esp)
addl $12, %esp
popl %esi
popl %edi
ret
--
Configure bugmail: http://llvm.org/bugs/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
You are on the CC list for the bug.
More information about the llvm-bugs
mailing list