[llvm-bugs] [Bug 47282] New: Loop not vectorized with -march=haswell

Sat Aug 22 05:10:31 PDT 2020

https://bugs.llvm.org/show_bug.cgi?id=47282

            Bug ID: 47282
           Summary: Loop not vectorized with -march=haswell
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: Linux
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: Loop Optimizer
          Assignee: unassignedbugs at nondot.org
          Reporter: david.bolvansky at gmail.com
                CC: llvm-bugs at lists.llvm.org

void test1(int*__restrict n1, int*__restrict n2,
    int*__restrict n3, int*__restrict n4, unsigned N)
{
    for (int n = 0; n < N; ++n)
    {
        if (n1[n] > 0)
            n2[n] = n3[n];
        else
            n2[n] = n4[n];
    }
}


Clang:
.LBB0_7:                                # =>This Inner Loop Header: Depth=1
        cmp     dword ptr [rdi + 4*r10], 0
        mov     rax, rcx
        cmovg   rax, rdx
        mov     eax, dword ptr [rax + 4*r10]
        mov     dword ptr [rsi + 4*r10], eax
        cmp     dword ptr [rdi + 4*r10 + 4], 0
        mov     rax, rcx
        cmovg   rax, rdx
        mov     eax, dword ptr [rax + 4*r10 + 4]
        mov     dword ptr [rsi + 4*r10 + 4], eax
        cmp     dword ptr [rdi + 4*r10 + 8], 0
        mov     rax, rcx
        cmovg   rax, rdx
        mov     eax, dword ptr [rax + 4*r10 + 8]
        mov     dword ptr [rsi + 4*r10 + 8], eax
        cmp     dword ptr [rdi + 4*r10 + 12], 0
        mov     rax, rcx
        cmovg   rax, rdx
        mov     eax, dword ptr [rax + 4*r10 + 12]
        mov     dword ptr [rsi + 4*r10 + 12], eax
        add     r10, 4
        cmp     r9, r10
        jne     .LBB0_7

GCC/ICC:
.L5:
        vmovdqu ymm0, YMMWORD PTR [rdi+rax]
        vpcmpgtd        ymm1, ymm0, ymm2
        vpminsd ymm4, ymm0, ymm2
        vpmaskmovd      ymm3, ymm1, YMMWORD PTR [rdx+rax]
        vpcmpeqd        ymm0, ymm0, ymm4
        vpmaskmovd      ymm0, ymm0, YMMWORD PTR [rcx+rax]
        vpblendvb       ymm0, ymm0, ymm3, ymm1
        vmovdqu YMMWORD PTR [rsi+rax], ymm0
        add     rax, 32
        cmp     rax, r9
        jne     .L5
        mov     eax, r8d
        and     eax, -8
        mov     r9d, eax
        cmp     r8d, eax
        je      .L22
        vzeroupper

https://godbolt.org/z/h9hbfv

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20200822/e790ecd8/attachment.html>