[llvm-bugs] [Bug 48046] New: Better codegen for strided load

via llvm-bugs llvm-bugs at lists.llvm.org
Mon Nov 2 05:18:44 PST 2020


https://bugs.llvm.org/show_bug.cgi?id=48046

            Bug ID: 48046
           Summary: Better codegen for strided load
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: Windows NT
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: Backend: X86
          Assignee: unassignedbugs at nondot.org
          Reporter: david.bolvansky at gmail.com
                CC: craig.topper at gmail.com, llvm-bugs at lists.llvm.org,
                    llvm-dev at redking.me.uk, pengfei.wang at intel.com,
                    spatel+llvm at rotateright.com

#define N 4

float x[4*N], y[N];

void foo (int p)
{
  int i;
  for (i = 0; i < N; i++)
    y[i] = x[p + 3*i];
}

Clang -O3 -mavx2:
foo(int):                                # @foo(int)
        movsxd  rax, edi
        vmovss  xmm0, dword ptr [4*rax + x]     # xmm0 = mem[0],zero,zero,zero
        vmovss  dword ptr [rip + y], xmm0
        vmovss  xmm0, dword ptr [4*rax + x+12]  # xmm0 = mem[0],zero,zero,zero
        vmovss  dword ptr [rip + y+4], xmm0
        vmovss  xmm0, dword ptr [4*rax + x+24]  # xmm0 = mem[0],zero,zero,zero
        vmovss  dword ptr [rip + y+8], xmm0
        vmovss  xmm0, dword ptr [4*rax + x+36]  # xmm0 = mem[0],zero,zero,zero
        vmovss  dword ptr [rip + y+12], xmm0
        ret

ICC -O3 -mavx2:
foo(int):
        movsxd    rdi, edi                                      #6.1
        vmovss    xmm16, DWORD PTR [12+x+rdi*4]                 #10.12
        vmovss    xmm17, DWORD PTR [x+rdi*4]                    #10.12
        vinsertps xmm1, xmm16, DWORD PTR [36+x+rdi*4], 16       #10.12
        vinsertps xmm0, xmm17, DWORD PTR [24+x+rdi*4], 16       #10.12
        vunpcklps xmm2, xmm0, xmm1                              #10.12
        vmovups   XMMWORD PTR y[rip], xmm2                      #10.5
        ret                                                     #11.1
x:
y:

GCC  -O3 -mavx2:
foo(int):
        movsx   rdi, edi
        vmovss  xmm0, DWORD PTR x[0+rdi*4]
        vinsertps       xmm0, xmm0, DWORD PTR x[12+rdi*4], 0x10
        vmovlps QWORD PTR y[rip], xmm0
        vmovss  xmm0, DWORD PTR x[24+rdi*4]
        vinsertps       xmm0, xmm0, DWORD PTR x[36+rdi*4], 0x10
        vmovlps QWORD PTR y[rip+8], xmm0
        ret
y:

GCC has the best Block RThroughput value - 2.5.

https://godbolt.org/z/Pc1TWz

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20201102/9b9e472d/attachment.html>


More information about the llvm-bugs mailing list