[llvm-bugs] [Bug 47283] New: Suboptimal x86 vector extend patterns

via llvm-bugs llvm-bugs at lists.llvm.org
Sat Aug 22 05:22:06 PDT 2020


https://bugs.llvm.org/show_bug.cgi?id=47283

            Bug ID: 47283
           Summary: Suboptimal x86 vector extend patterns
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: Linux
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: Backend: X86
          Assignee: unassignedbugs at nondot.org
          Reporter: david.bolvansky at gmail.com
                CC: craig.topper at gmail.com, llvm-bugs at lists.llvm.org,
                    llvm-dev at redking.me.uk, spatel+llvm at rotateright.com

typedef unsigned char v32qi __attribute__((vector_size (32)));
typedef unsigned long long v4di __attribute__((vector_size (32)));

void
bar_u8_u64 (v4di * dst, v32qi src)
{
  unsigned long long tem[4];

  for (int i = 0; i < 4; i++)
    tem[i] = src[i];

  dst[0] = *(v4di *) tem;
}


Clang -O3:
bar_u8_u64(unsigned long long __vector(4)*, unsigned char __vector(32)):       
     # @bar_u8_u64(unsigned long long __vector(4)*, unsigned char __vector(32))
        push    rbp
        mov     rbp, rsp
        and     rsp, -32
        sub     rsp, 64
        movdqa  xmm0, xmmword ptr [rbp + 16]
        pxor    xmm1, xmm1
        punpcklbw       xmm0, xmm1              # xmm0 =
xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
        punpcklwd       xmm0, xmm1              # xmm0 =
xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
        movdqa  xmm2, xmm0
        punpckldq       xmm2, xmm1              # xmm2 =
xmm2[0],xmm1[0],xmm2[1],xmm1[1]
        movdqa  xmmword ptr [rsp + 16], xmm2
        punpckhdq       xmm0, xmm1              # xmm0 =
xmm0[2],xmm1[2],xmm0[3],xmm1[3]
        movdqa  xmmword ptr [rsp + 32], xmm0
        movdqa  xmmword ptr [rdi + 16], xmm0
        movdqa  xmmword ptr [rdi], xmm2
        mov     rsp, rbp
        pop     rbp
        ret

Dispatch Width:    6
uOps Per Cycle:    1.59
IPC:               1.06
Block RThroughput: 5.0

ICC:
bar_u8_u64(unsigned long long __vector(4)*, unsigned char __vector(32)):
        movd      xmm2, DWORD PTR [8+rsp]                       #10.18
        pxor      xmm0, xmm0                                    #10.18
        movdqa    xmm1, xmm2                                    #10.18
        psrldq    xmm2, 2                                       #10.18
        punpcklbw xmm1, xmm0                                    #10.18
        punpcklbw xmm2, xmm0                                    #10.18
        punpcklwd xmm1, xmm0                                    #10.18
        punpcklwd xmm2, xmm0                                    #10.18
        punpckldq xmm1, xmm0                                    #10.18
        punpckldq xmm2, xmm0                                    #10.18
        movdqu    XMMWORD PTR [rdi], xmm1                       #12.3
        movdqu    XMMWORD PTR [16+rdi], xmm2                    #12.3
        ret

Dispatch Width:    6
uOps Per Cycle:    2.40
IPC:               1.83
Block RThroughput: 7.0


https://godbolt.org/z/1xP3j8

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20200822/678e13f2/attachment.html>


More information about the llvm-bugs mailing list