[llvm-bugs] [Bug 27019] New: Loop unrolling generates absurd code (-O2)
via llvm-bugs
llvm-bugs at lists.llvm.org
Mon Mar 21 14:12:13 PDT 2016
https://llvm.org/bugs/show_bug.cgi?id=27019
Bug ID: 27019
Summary: Loop unrolling generates absurd code (-O2)
Product: clang
Version: trunk
Hardware: PC
OS: Windows NT
Status: NEW
Severity: normal
Priority: P
Component: C++
Assignee: unassignedclangbugs at nondot.org
Reporter: kobalicek.petr at gmail.com
CC: dgregor at apple.com, llvm-bugs at lists.llvm.org
Classification: Unclassified
C++ Function
------------
#include <cstdlib>
#include <cmath>
void vecsin(double* x, double* y, size_t n) {
for (size_t i = 0; i < n; i++) {
x[i] = std::sin(y[i]);
}
}
Compiled by GCC 6.0
-------------------
vecsin(double*, double*, unsigned long):
test rdx, rdx
je .L9
push r12
lea r12, [rsi+rdx*8]
push rbp
mov rbp, rdi
push rbx
mov rbx, rsi
.L4:
movsd xmm0, QWORD PTR [rbx]
add rbx, 8
add rbp, 8
call sin
movsd QWORD PTR [rbp-8], xmm0
cmp rbx, r12
jne .L4
pop rbx
pop rbp
pop r12
.L9:
rep ret
Compiled by Clang 3.6
---------------------
vecsin(double*, double*, unsigned long): #
@vecsin(double*, double*, unsigned long)
push r15
push r14
push rbx
mov r14, rdx
mov r15, rsi
mov rbx, rdi
test r14, r14
je .LBB0_2
.LBB0_1: # %.lr.ph
movsd xmm0, qword ptr [r15]
call sin
movsd qword ptr [rbx], xmm0
add r15, 8
add rbx, 8
dec r14
jne .LBB0_1
.LBB0_2: # %._crit_edge
pop rbx
pop r14
pop r15
ret
Compiled by Clang 3.8
---------------------
vecsin(double*, double*, unsigned long): #
@vecsin(double*, double*, unsigned long)
push rbp
push r15
push r14
push r13
push r12
push rbx
sub rsp, 56
mov r15, rdx
mov r12, rsi
mov r13, rdi
test r15, r15
je .LBB0_18
xor ebp, ebp
cmp r15, 1
jbe .LBB0_2
xor ebp, ebp
mov rcx, r15
and rcx, -2
je .LBB0_2
lea rax, [r12 + 8*r15 - 8]
xor ebp, ebp
cmp rax, r13
jb .LBB0_11
lea rax, [r13 + 8*r15 - 8]
cmp rax, r12
jae .LBB0_2
.LBB0_11: # %vector.body.preheader
mov qword ptr [rsp], rcx # 8-byte Spill
lea r14, [r15 - 2]
mov eax, r14d
shr eax
inc eax
xor ebp, ebp
test al, 3
je .LBB0_14
lea ebx, [r15 - 2]
shr ebx
inc ebx
and ebx, 3
neg rbx
xor ebp, ebp
.LBB0_13: # %vector.body.prol
movups xmm0, xmmword ptr [r12 + 8*rbp]
movaps xmmword ptr [rsp + 16], xmm0 # 16-byte Spill
call sin
movaps xmmword ptr [rsp + 32], xmm0 # 16-byte Spill
movapd xmm0, xmmword ptr [rsp + 16] # 16-byte Reload
shufpd xmm0, xmm0, 1 # xmm0 = xmm0[1,0]
call sin
movapd xmm1, xmmword ptr [rsp + 32] # 16-byte Reload
unpcklpd xmm1, xmm0 # xmm1 = xmm1[0],xmm0[0]
movupd xmmword ptr [r13 + 8*rbp], xmm1
add rbp, 2
inc rbx
jne .LBB0_13
.LBB0_14: # %vector.body.preheader.split
mov qword ptr [rsp + 8], r13 # 8-byte Spill
mov rbx, r12
cmp r14, 6
jb .LBB0_17
mov r13, r15
and r13, -2
sub r13, rbp
mov rax, qword ptr [rsp + 8] # 8-byte Reload
lea r14, [rax + 8*rbp + 48]
lea r12, [rbx + 8*rbp + 48]
.LBB0_16: # %vector.body
movups xmm0, xmmword ptr [r12 - 48]
movaps xmmword ptr [rsp + 16], xmm0 # 16-byte Spill
call sin
movaps xmmword ptr [rsp + 32], xmm0 # 16-byte Spill
movapd xmm0, xmmword ptr [rsp + 16] # 16-byte Reload
shufpd xmm0, xmm0, 1 # xmm0 = xmm0[1,0]
call sin
movapd xmm1, xmmword ptr [rsp + 32] # 16-byte Reload
unpcklpd xmm1, xmm0 # xmm1 = xmm1[0],xmm0[0]
movupd xmmword ptr [r14 - 48], xmm1
movups xmm0, xmmword ptr [r12 - 32]
movaps xmmword ptr [rsp + 16], xmm0 # 16-byte Spill
call sin
movaps xmmword ptr [rsp + 32], xmm0 # 16-byte Spill
movapd xmm0, xmmword ptr [rsp + 16] # 16-byte Reload
shufpd xmm0, xmm0, 1 # xmm0 = xmm0[1,0]
call sin
movapd xmm1, xmmword ptr [rsp + 32] # 16-byte Reload
unpcklpd xmm1, xmm0 # xmm1 = xmm1[0],xmm0[0]
movupd xmmword ptr [r14 - 32], xmm1
movups xmm0, xmmword ptr [r12 - 16]
movaps xmmword ptr [rsp + 16], xmm0 # 16-byte Spill
call sin
movaps xmmword ptr [rsp + 32], xmm0 # 16-byte Spill
movapd xmm0, xmmword ptr [rsp + 16] # 16-byte Reload
shufpd xmm0, xmm0, 1 # xmm0 = xmm0[1,0]
call sin
movapd xmm1, xmmword ptr [rsp + 32] # 16-byte Reload
unpcklpd xmm1, xmm0 # xmm1 = xmm1[0],xmm0[0]
movupd xmmword ptr [r14 - 16], xmm1
movups xmm0, xmmword ptr [r12]
movaps xmmword ptr [rsp + 16], xmm0 # 16-byte Spill
call sin
movaps xmmword ptr [rsp + 32], xmm0 # 16-byte Spill
movapd xmm0, xmmword ptr [rsp + 16] # 16-byte Reload
shufpd xmm0, xmm0, 1 # xmm0 = xmm0[1,0]
call sin
movapd xmm1, xmmword ptr [rsp + 32] # 16-byte Reload
unpcklpd xmm1, xmm0 # xmm1 = xmm1[0],xmm0[0]
movupd xmmword ptr [r14], xmm1
add r14, 64
add r12, 64
add r13, -8
jne .LBB0_16
.LBB0_17: # %middle.block
mov rax, qword ptr [rsp] # 8-byte Reload
cmp rax, r15
mov rbp, rax
mov r12, rbx
mov r13, qword ptr [rsp + 8] # 8-byte Reload
je .LBB0_18
.LBB0_2: # %.lr.ph.preheader9
mov eax, r15d
sub eax, ebp
lea r14, [r15 - 1]
sub r14, rbp
test al, 3
je .LBB0_5
mov ebx, r15d
sub ebx, ebp
and ebx, 3
neg rbx
.LBB0_4: # %.lr.ph.prol
movsd xmm0, qword ptr [r12 + 8*rbp] # xmm0 = mem[0],zero
call sin
movsd qword ptr [r13 + 8*rbp], xmm0
inc rbp
inc rbx
jne .LBB0_4
.LBB0_5: # %.lr.ph.preheader9.split
cmp r14, 3
jb .LBB0_18
sub r15, rbp
lea rbx, [r13 + 8*rbp + 24]
lea rbp, [r12 + 8*rbp + 24]
.LBB0_7: # %.lr.ph
movsd xmm0, qword ptr [rbp - 24] # xmm0 = mem[0],zero
call sin
movsd qword ptr [rbx - 24], xmm0
movsd xmm0, qword ptr [rbp - 16] # xmm0 = mem[0],zero
call sin
movsd qword ptr [rbx - 16], xmm0
movsd xmm0, qword ptr [rbp - 8] # xmm0 = mem[0],zero
call sin
movsd qword ptr [rbx - 8], xmm0
movsd xmm0, qword ptr [rbp] # xmm0 = mem[0],zero
call sin
movsd qword ptr [rbx], xmm0
add rbx, 32
add rbp, 32
add r15, -4
jne .LBB0_7
.LBB0_18: # %._crit_edge
add rsp, 56
pop rbx
pop r12
pop r13
pop r14
pop r15
pop rbp
ret
I think unrolling calls to sin() is absurd in this case..
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20160321/99f0d9ff/attachment-0001.html>
More information about the llvm-bugs
mailing list