[LLVMbugs] [Bug 6396] New: de-optimization of down-counting loops with i32 counter
bugzilla-daemon at llvm.org
bugzilla-daemon at llvm.org
Mon Feb 22 14:03:17 PST 2010
http://llvm.org/bugs/show_bug.cgi?id=6396
Summary: de-optimization of down-counting loops with i32
counter
Product: libraries
Version: 2.6
Platform: PC
OS/Version: Linux
Status: NEW
Severity: normal
Priority: P5
Component: Loop Optimizer
AssignedTo: unassignedbugs at nondot.org
ReportedBy: llvm at henning-thielemann.de
CC: llvmbugs at cs.uiuc.edu
I have written a simple loop using an i32 counter that counts from the number
of repetitions down to zero. I hoped that x86 codegen could make use of the
LOOP instruction. It does not, but this is not the main problem here.
define i32 @_fun1(i32, float*) {
_L1:
br label %_L2
_L2:
%2 = phi i32 [ %0, %_L1 ], [ %7, %_L3 ]
%3 = phi float* [ %1, %_L1 ], [ %8, %_L3 ]
%4 = phi float [ 1.000000e+00, %_L1 ], [ %6, %_L3 ]
%5 = icmp ne i32 %2, 0
br i1 %5, label %_L3, label %_L5
_L3:
%6 = fmul float %4, 0x3FEFFFE2E0000000
store float %4, float* %3
%7 = sub i32 %2, 1
%8 = getelementptr float* %3, i32 1
br label %_L2
_L5:
%9 = sub i32 %0, %2
ret i32 %9
}
This gets compiled to
.LBB1_0: # %_L1
movl 4(%esp), %ecx
incl %ecx
movss .LCPI1_0, %xmm0
xorl %eax, %eax
movl 8(%esp), %edx
jmp .LBB1_2
.align 16
.LBB1_1: # %_L3
# Loop Depth 1
# Loop Header is
BB1_2
# Inner Loop
movss %xmm0, (%edx,%eax,4)
incl %eax
mulss .LCPI1_1, %xmm0
.LBB1_2: # %_L2
# Loop Depth 1
# Loop Header
# Inner Loop
decl %ecx
jne .LBB1_1
I still wonder, why it does not just increase %edx, as I told it in the LLVM
code. But the main problem follows:
Optimization introduces a new counter that is 64 bit long and counts upwards:
define i32 @_fun1(i32, float* nocapture) nounwind {
_L1:
%2 = icmp eq i32 %0, 0 ; <i1> [#uses=1]
br i1 %2, label %_L5, label %bb.nph
bb.nph: ; preds = %_L1
%tmp = zext i32 %0 to i64 ; <i64> [#uses=1]
br label %_L3
_L3: ; preds = %_L3, %bb.nph
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %_L3 ] ; <i64> [#uses=2]
%3 = phi float [ 1.000000e+00, %bb.nph ], [ %4, %_L3 ] ; <float> [#uses=2]
%scevgep = getelementptr float* %1, i64 %indvar ; <float*> [#uses=1]
%4 = fmul float %3, 0x3FEFFFE2E0000000 ; <float> [#uses=1]
store float %3, float* %scevgep
%indvar.next = add i64 %indvar, 1 ; <i64> [#uses=2]
%exitcond = icmp eq i64 %indvar.next, %tmp ; <i1> [#uses=1]
br i1 %exitcond, label %_L5, label %_L3
_L5: ; preds = %_L3, %_L1
%.lcssa = phi i32 [ %0, %_L1 ], [ 0, %_L3 ] ; <i32> [#uses=1]
%5 = sub i32 %0, %.lcssa ; <i32> [#uses=1]
ret i32 %5
}
The assembly code does no longer look as nice as before:
.LBB1_0: # %_L1
pushl %edi
pushl %esi
movl 12(%esp), %eax
testl %eax, %eax
je .LBB1_5
.LBB1_1: # %bb.nph
movl 16(%esp), %ecx
xorl %edx, %edx
movss .LCPI1_0, %xmm0
movl %eax, %esi
.align 16
.LBB1_2: # %_L3
# Loop Depth 1
# Loop Header
# Inner Loop
movss %xmm0, (%ecx)
addl $4294967295, %esi
adcl $4294967295, %edx
movl %esi, %edi
orl %edx, %edi
addl $4, %ecx
testl %edi, %edi
mulss .LCPI1_1, %xmm0
jne .LBB1_2
.LBB1_3: #
%_L3._L5_crit_edge
xorl %ecx, %ecx
.LBB1_4: # %_L5
subl %ecx, %eax
popl %esi
popl %edi
ret
.LBB1_5: #
%_L1._L5_crit_edge
movl %eax, %ecx
jmp .LBB1_4
The 64 bit computation really adds more instructions but is unnecessary since
the original LLVM code is satisfied with a 32 bit counter.
--
Configure bugmail: http://llvm.org/bugs/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
You are on the CC list for the bug.
More information about the llvm-bugs
mailing list