<html>
    <head>
      <base href="https://bugs.llvm.org/">
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW - Better codegen for strided load"
   href="https://bugs.llvm.org/show_bug.cgi?id=48046">48046</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>Better codegen for strided load
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>libraries
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>trunk
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>PC
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>Windows NT
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>enhancement
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>P
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>Backend: X86
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>unassignedbugs@nondot.org
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>david.bolvansky@gmail.com
          </td>
        </tr>

        <tr>
          <th>CC</th>
          <td>craig.topper@gmail.com, llvm-bugs@lists.llvm.org, llvm-dev@redking.me.uk, pengfei.wang@intel.com, spatel+llvm@rotateright.com
          </td>
        </tr></table>
      <p>
        <div>
        <pre>#define N 4

float x[4*N], y[N];

void foo (int p)
{
  int i;
  for (i = 0; i < N; i++)
    y[i] = x[p + 3*i];
}

Clang -O3 -mavx2:
foo(int):                                # @foo(int)
        movsxd  rax, edi
        vmovss  xmm0, dword ptr [4*rax + x]     # xmm0 = mem[0],zero,zero,zero
        vmovss  dword ptr [rip + y], xmm0
        vmovss  xmm0, dword ptr [4*rax + x+12]  # xmm0 = mem[0],zero,zero,zero
        vmovss  dword ptr [rip + y+4], xmm0
        vmovss  xmm0, dword ptr [4*rax + x+24]  # xmm0 = mem[0],zero,zero,zero
        vmovss  dword ptr [rip + y+8], xmm0
        vmovss  xmm0, dword ptr [4*rax + x+36]  # xmm0 = mem[0],zero,zero,zero
        vmovss  dword ptr [rip + y+12], xmm0
        ret

ICC -O3 -mavx2:
foo(int):
        movsxd    rdi, edi                                      #6.1
        vmovss    xmm16, DWORD PTR [12+x+rdi*4]                 #10.12
        vmovss    xmm17, DWORD PTR [x+rdi*4]                    #10.12
        vinsertps xmm1, xmm16, DWORD PTR [36+x+rdi*4], 16       #10.12
        vinsertps xmm0, xmm17, DWORD PTR [24+x+rdi*4], 16       #10.12
        vunpcklps xmm2, xmm0, xmm1                              #10.12
        vmovups   XMMWORD PTR y[rip], xmm2                      #10.5
        ret                                                     #11.1
x:
y:

GCC  -O3 -mavx2:
foo(int):
        movsx   rdi, edi
        vmovss  xmm0, DWORD PTR x[0+rdi*4]
        vinsertps       xmm0, xmm0, DWORD PTR x[12+rdi*4], 0x10
        vmovlps QWORD PTR y[rip], xmm0
        vmovss  xmm0, DWORD PTR x[24+rdi*4]
        vinsertps       xmm0, xmm0, DWORD PTR x[36+rdi*4], 0x10
        vmovlps QWORD PTR y[rip+8], xmm0
        ret
y:

GCC has the best Block RThroughput value - 2.5.

<a href="https://godbolt.org/z/Pc1TWz">https://godbolt.org/z/Pc1TWz</a></pre>
        </div>
      </p>


      <hr>
      <span>You are receiving this mail because:</span>

      <ul>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>