[llvm-bugs] [Bug 27019] New: Loop unrolling generates absurd code (-O2)

via llvm-bugs llvm-bugs at lists.llvm.org
Mon Mar 21 14:12:13 PDT 2016


https://llvm.org/bugs/show_bug.cgi?id=27019

            Bug ID: 27019
           Summary: Loop unrolling generates absurd code (-O2)
           Product: clang
           Version: trunk
          Hardware: PC
                OS: Windows NT
            Status: NEW
          Severity: normal
          Priority: P
         Component: C++
          Assignee: unassignedclangbugs at nondot.org
          Reporter: kobalicek.petr at gmail.com
                CC: dgregor at apple.com, llvm-bugs at lists.llvm.org
    Classification: Unclassified

C++ Function
------------

#include <cstdlib>
#include <cmath>

void vecsin(double* x, double* y, size_t n) {
  for (size_t i = 0; i < n; i++) {
    x[i] = std::sin(y[i]);
  }
}

Compiled by GCC 6.0
-------------------

vecsin(double*, double*, unsigned long):
        test    rdx, rdx
        je      .L9
        push    r12
        lea     r12, [rsi+rdx*8]
        push    rbp
        mov     rbp, rdi
        push    rbx
        mov     rbx, rsi
.L4:
        movsd   xmm0, QWORD PTR [rbx]
        add     rbx, 8
        add     rbp, 8
        call    sin
        movsd   QWORD PTR [rbp-8], xmm0
        cmp     rbx, r12
        jne     .L4
        pop     rbx
        pop     rbp
        pop     r12
.L9:
        rep ret

Compiled by Clang 3.6
---------------------

vecsin(double*, double*, unsigned long):                         #
@vecsin(double*, double*, unsigned long)
        push    r15
        push    r14
        push    rbx
        mov     r14, rdx
        mov     r15, rsi
        mov     rbx, rdi
        test    r14, r14
        je      .LBB0_2
.LBB0_1:                                # %.lr.ph
        movsd   xmm0, qword ptr [r15]
        call    sin
        movsd   qword ptr [rbx], xmm0
        add     r15, 8
        add     rbx, 8
        dec     r14
        jne     .LBB0_1
.LBB0_2:                                # %._crit_edge
        pop     rbx
        pop     r14
        pop     r15
        ret

Compiled by Clang 3.8
---------------------

vecsin(double*, double*, unsigned long):                         #
@vecsin(double*, double*, unsigned long)
        push    rbp
        push    r15
        push    r14
        push    r13
        push    r12
        push    rbx
        sub     rsp, 56
        mov     r15, rdx
        mov     r12, rsi
        mov     r13, rdi
        test    r15, r15
        je      .LBB0_18
        xor     ebp, ebp
        cmp     r15, 1
        jbe     .LBB0_2
        xor     ebp, ebp
        mov     rcx, r15
        and     rcx, -2
        je      .LBB0_2
        lea     rax, [r12 + 8*r15 - 8]
        xor     ebp, ebp
        cmp     rax, r13
        jb      .LBB0_11
        lea     rax, [r13 + 8*r15 - 8]
        cmp     rax, r12
        jae     .LBB0_2
.LBB0_11:                               # %vector.body.preheader
        mov     qword ptr [rsp], rcx    # 8-byte Spill
        lea     r14, [r15 - 2]
        mov     eax, r14d
        shr     eax
        inc     eax
        xor     ebp, ebp
        test    al, 3
        je      .LBB0_14
        lea     ebx, [r15 - 2]
        shr     ebx
        inc     ebx
        and     ebx, 3
        neg     rbx
        xor     ebp, ebp
.LBB0_13:                               # %vector.body.prol
        movups  xmm0, xmmword ptr [r12 + 8*rbp]
        movaps  xmmword ptr [rsp + 16], xmm0 # 16-byte Spill
        call    sin
        movaps  xmmword ptr [rsp + 32], xmm0 # 16-byte Spill
        movapd  xmm0, xmmword ptr [rsp + 16] # 16-byte Reload
        shufpd  xmm0, xmm0, 1           # xmm0 = xmm0[1,0]
        call    sin
        movapd  xmm1, xmmword ptr [rsp + 32] # 16-byte Reload
        unpcklpd        xmm1, xmm0      # xmm1 = xmm1[0],xmm0[0]
        movupd  xmmword ptr [r13 + 8*rbp], xmm1
        add     rbp, 2
        inc     rbx
        jne     .LBB0_13
.LBB0_14:                               # %vector.body.preheader.split
        mov     qword ptr [rsp + 8], r13 # 8-byte Spill
        mov     rbx, r12
        cmp     r14, 6
        jb      .LBB0_17
        mov     r13, r15
        and     r13, -2
        sub     r13, rbp
        mov     rax, qword ptr [rsp + 8] # 8-byte Reload
        lea     r14, [rax + 8*rbp + 48]
        lea     r12, [rbx + 8*rbp + 48]
.LBB0_16:                               # %vector.body
        movups  xmm0, xmmword ptr [r12 - 48]
        movaps  xmmword ptr [rsp + 16], xmm0 # 16-byte Spill
        call    sin
        movaps  xmmword ptr [rsp + 32], xmm0 # 16-byte Spill
        movapd  xmm0, xmmword ptr [rsp + 16] # 16-byte Reload
        shufpd  xmm0, xmm0, 1           # xmm0 = xmm0[1,0]
        call    sin
        movapd  xmm1, xmmword ptr [rsp + 32] # 16-byte Reload
        unpcklpd        xmm1, xmm0      # xmm1 = xmm1[0],xmm0[0]
        movupd  xmmword ptr [r14 - 48], xmm1
        movups  xmm0, xmmword ptr [r12 - 32]
        movaps  xmmword ptr [rsp + 16], xmm0 # 16-byte Spill
        call    sin
        movaps  xmmword ptr [rsp + 32], xmm0 # 16-byte Spill
        movapd  xmm0, xmmword ptr [rsp + 16] # 16-byte Reload
        shufpd  xmm0, xmm0, 1           # xmm0 = xmm0[1,0]
        call    sin
        movapd  xmm1, xmmword ptr [rsp + 32] # 16-byte Reload
        unpcklpd        xmm1, xmm0      # xmm1 = xmm1[0],xmm0[0]
        movupd  xmmword ptr [r14 - 32], xmm1
        movups  xmm0, xmmword ptr [r12 - 16]
        movaps  xmmword ptr [rsp + 16], xmm0 # 16-byte Spill
        call    sin
        movaps  xmmword ptr [rsp + 32], xmm0 # 16-byte Spill
        movapd  xmm0, xmmword ptr [rsp + 16] # 16-byte Reload
        shufpd  xmm0, xmm0, 1           # xmm0 = xmm0[1,0]
        call    sin
        movapd  xmm1, xmmword ptr [rsp + 32] # 16-byte Reload
        unpcklpd        xmm1, xmm0      # xmm1 = xmm1[0],xmm0[0]
        movupd  xmmword ptr [r14 - 16], xmm1
        movups  xmm0, xmmword ptr [r12]
        movaps  xmmword ptr [rsp + 16], xmm0 # 16-byte Spill
        call    sin
        movaps  xmmword ptr [rsp + 32], xmm0 # 16-byte Spill
        movapd  xmm0, xmmword ptr [rsp + 16] # 16-byte Reload
        shufpd  xmm0, xmm0, 1           # xmm0 = xmm0[1,0]
        call    sin
        movapd  xmm1, xmmword ptr [rsp + 32] # 16-byte Reload
        unpcklpd        xmm1, xmm0      # xmm1 = xmm1[0],xmm0[0]
        movupd  xmmword ptr [r14], xmm1
        add     r14, 64
        add     r12, 64
        add     r13, -8
        jne     .LBB0_16
.LBB0_17:                               # %middle.block
        mov     rax, qword ptr [rsp]    # 8-byte Reload
        cmp     rax, r15
        mov     rbp, rax
        mov     r12, rbx
        mov     r13, qword ptr [rsp + 8] # 8-byte Reload
        je      .LBB0_18
.LBB0_2:                                # %.lr.ph.preheader9
        mov     eax, r15d
        sub     eax, ebp
        lea     r14, [r15 - 1]
        sub     r14, rbp
        test    al, 3
        je      .LBB0_5
        mov     ebx, r15d
        sub     ebx, ebp
        and     ebx, 3
        neg     rbx
.LBB0_4:                                # %.lr.ph.prol
        movsd   xmm0, qword ptr [r12 + 8*rbp] # xmm0 = mem[0],zero
        call    sin
        movsd   qword ptr [r13 + 8*rbp], xmm0
        inc     rbp
        inc     rbx
        jne     .LBB0_4
.LBB0_5:                                # %.lr.ph.preheader9.split
        cmp     r14, 3
        jb      .LBB0_18
        sub     r15, rbp
        lea     rbx, [r13 + 8*rbp + 24]
        lea     rbp, [r12 + 8*rbp + 24]
.LBB0_7:                                # %.lr.ph
        movsd   xmm0, qword ptr [rbp - 24] # xmm0 = mem[0],zero
        call    sin
        movsd   qword ptr [rbx - 24], xmm0
        movsd   xmm0, qword ptr [rbp - 16] # xmm0 = mem[0],zero
        call    sin
        movsd   qword ptr [rbx - 16], xmm0
        movsd   xmm0, qword ptr [rbp - 8] # xmm0 = mem[0],zero
        call    sin
        movsd   qword ptr [rbx - 8], xmm0
        movsd   xmm0, qword ptr [rbp]   # xmm0 = mem[0],zero
        call    sin
        movsd   qword ptr [rbx], xmm0
        add     rbx, 32
        add     rbp, 32
        add     r15, -4
        jne     .LBB0_7
.LBB0_18:                               # %._crit_edge
        add     rsp, 56
        pop     rbx
        pop     r12
        pop     r13
        pop     r14
        pop     r15
        pop     rbp
        ret

I think unrolling calls to sin() is absurd in this case..

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20160321/99f0d9ff/attachment-0001.html>


More information about the llvm-bugs mailing list