[llvm-bugs] [Bug 15077] Should not vectorize variable shifts when no instructions for it are available

via llvm-bugs llvm-bugs at lists.llvm.org
Mon Oct 24 05:55:31 PDT 2016


https://llvm.org/bugs/show_bug.cgi?id=15077

Simon Pilgrim <llvm-dev at redking.me.uk> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|NEW                         |RESOLVED
         Resolution|---                         |FIXED

--- Comment #4 from Simon Pilgrim <llvm-dev at redking.me.uk> ---
Resolving this. With rL284939 we have accurate vector shift costs for SSE4.1+
which means that we can correctly vectorize the inner loop as 4 * <4 x i32>:

LBB0_8:
    vpmovsxbd    -12(%ebx), %xmm5
    vmovdqu    -48(%edi), %xmm3
    vmovdqu    -32(%edi), %xmm4
    vmovdqu    -16(%edi), %xmm2
    vmovdqu    (%edi), %xmm1
    vpsrldq    $12, %xmm5, %xmm6       # xmm6 =
xmm5[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
    vpsrlq    $32, %xmm5, %xmm7
    vpsrld    %xmm6, %xmm3, %xmm6
    vpsrld    %xmm7, %xmm3, %xmm7
    vpblendw    $240, %xmm6, %xmm7, %xmm6 # xmm6 = xmm7[0,1,2,3],xmm6[4,5,6,7]
    vpunpckhdq    %xmm0, %xmm5, %xmm7 # xmm7 = xmm5[2],xmm0[2],xmm5[3],xmm0[3]
    vpmovzxdq    %xmm5, %xmm5    # xmm5 = xmm5[0],zero,xmm5[1],zero
    vpsrld    %xmm7, %xmm3, %xmm7
    vpsrld    %xmm5, %xmm3, %xmm3
    vpmovsxbd    -8(%ebx), %xmm5
    vpblendw    $240, %xmm7, %xmm3, %xmm3 # xmm3 = xmm3[0,1,2,3],xmm7[4,5,6,7]
    vpblendw    $204, %xmm6, %xmm3, %xmm3 # xmm3 =
xmm3[0,1],xmm6[2,3],xmm3[4,5],xmm6[6,7]
    vpsrldq    $12, %xmm5, %xmm6       # xmm6 =
xmm5[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
    vpsrlq    $32, %xmm5, %xmm7
    vpsrld    %xmm6, %xmm4, %xmm6
    vpsrld    %xmm7, %xmm4, %xmm7
    vpblendw    $240, %xmm6, %xmm7, %xmm6 # xmm6 = xmm7[0,1,2,3],xmm6[4,5,6,7]
    vpunpckhdq    %xmm0, %xmm5, %xmm7 # xmm7 = xmm5[2],xmm0[2],xmm5[3],xmm0[3]
    vpmovzxdq    %xmm5, %xmm5    # xmm5 = xmm5[0],zero,xmm5[1],zero
    vpsrld    %xmm7, %xmm4, %xmm7
    vpsrld    %xmm5, %xmm4, %xmm4
    vpmovsxbd    -4(%ebx), %xmm5
    vpblendw    $240, %xmm7, %xmm4, %xmm4 # xmm4 = xmm4[0,1,2,3],xmm7[4,5,6,7]
    vpblendw    $204, %xmm6, %xmm4, %xmm4 # xmm4 =
xmm4[0,1],xmm6[2,3],xmm4[4,5],xmm6[6,7]
    vpsrldq    $12, %xmm5, %xmm6       # xmm6 =
xmm5[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
    vpsrlq    $32, %xmm5, %xmm7
    vpsrld    %xmm6, %xmm2, %xmm6
    vpsrld    %xmm7, %xmm2, %xmm7
    vpblendw    $240, %xmm6, %xmm7, %xmm6 # xmm6 = xmm7[0,1,2,3],xmm6[4,5,6,7]
    vpunpckhdq    %xmm0, %xmm5, %xmm7 # xmm7 = xmm5[2],xmm0[2],xmm5[3],xmm0[3]
    vpmovzxdq    %xmm5, %xmm5    # xmm5 = xmm5[0],zero,xmm5[1],zero
    vpsrld    %xmm7, %xmm2, %xmm7
    vpsrld    %xmm5, %xmm2, %xmm2
    vpmovsxbd    (%ebx), %xmm5
    vmovdqu    %xmm3, -48(%edi)
    vmovdqu    %xmm4, -32(%edi)
    addl    $16, %ebx
    vpblendw    $240, %xmm7, %xmm2, %xmm2 # xmm2 = xmm2[0,1,2,3],xmm7[4,5,6,7]
    vpblendw    $204, %xmm6, %xmm2, %xmm2 # xmm2 =
xmm2[0,1],xmm6[2,3],xmm2[4,5],xmm6[6,7]
    vmovdqu    %xmm2, -16(%edi)
    vpsrldq    $12, %xmm5, %xmm6       # xmm6 =
xmm5[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
    vpsrlq    $32, %xmm5, %xmm7
    vpsrld    %xmm6, %xmm1, %xmm6
    vpsrld    %xmm7, %xmm1, %xmm7
    vpblendw    $240, %xmm6, %xmm7, %xmm6 # xmm6 = xmm7[0,1,2,3],xmm6[4,5,6,7]
    vpunpckhdq    %xmm0, %xmm5, %xmm7 # xmm7 = xmm5[2],xmm0[2],xmm5[3],xmm0[3]
    vpmovzxdq    %xmm5, %xmm5    # xmm5 = xmm5[0],zero,xmm5[1],zero
    vpsrld    %xmm7, %xmm1, %xmm7
    vpsrld    %xmm5, %xmm1, %xmm1
    vpblendw    $240, %xmm7, %xmm1, %xmm1 # xmm1 = xmm1[0,1,2,3],xmm7[4,5,6,7]
    vpblendw    $204, %xmm6, %xmm1, %xmm1 # xmm1 =
xmm1[0,1],xmm6[2,3],xmm1[4,5],xmm6[6,7]
    vmovdqu    %xmm1, (%edi)
    addl    $64, %edi
    addl    $-16, %ebp
    jne    LBB0_8
    movl    %esi, %edi
    cmpl    %eax, %esi
    je    LBB0_10

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20161024/0ede95a9/attachment.html>


More information about the llvm-bugs mailing list