<html>
    <head>
      <base href="https://bugs.llvm.org/">
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW - Missed vectorization in SPEC benchmark - emulated gather capability?"
   href="https://bugs.llvm.org/show_bug.cgi?id=52358">52358</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>Missed vectorization in SPEC benchmark - emulated gather capability?
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>libraries
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>trunk
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>PC
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>Linux
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>enhancement
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>P
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>Loop Optimizer
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>unassignedbugs@nondot.org
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>david.bolvansky@gmail.com
          </td>
        </tr>

        <tr>
          <th>CC</th>
          <td>llvm-bugs@lists.llvm.org
          </td>
        </tr></table>
      <p>
        <div>
        <pre>Extracted from 450.soplex:

typedef double Real;

struct Element {
    Real val;
    int idx;
};

struct Vector {
    int dimen;
    Real* val;
};

Real foo(Element* e, int n, const Vector& w) {
    Real x = 0;

    while (n--) {
        x += e->val * w.val[e->idx];
        e++;
    }
    return x;
}

Flags: -Ofast -mavx2

Godbolt: <a href="https://godbolt.org/z/7f9nsf98f">https://godbolt.org/z/7f9nsf98f</a>


LLVM just unrolls this loop:
.LBB0_7:                                # =>This Inner Loop Header: Depth=1
        movsxd  rcx, dword ptr [rdi + 8]
        vmovsd  xmm1, qword ptr [rax + 8*rcx]   # xmm1 = mem[0],zero
        vmulsd  xmm1, xmm1, qword ptr [rdi]
        vaddsd  xmm0, xmm1, xmm0
        movsxd  rcx, dword ptr [rdi + 24]
        vmovsd  xmm1, qword ptr [rax + 8*rcx]   # xmm1 = mem[0],zero
        vmulsd  xmm1, xmm1, qword ptr [rdi + 16]
        movsxd  rcx, dword ptr [rdi + 40]
        vmovsd  xmm2, qword ptr [rax + 8*rcx]   # xmm2 = mem[0],zero
        vmulsd  xmm2, xmm2, qword ptr [rdi + 32]
        vaddsd  xmm1, xmm1, xmm2
        movsxd  rcx, dword ptr [rdi + 56]
        vmovsd  xmm2, qword ptr [rax + 8*rcx]   # xmm2 = mem[0],zero
        vmulsd  xmm2, xmm2, qword ptr [rdi + 48]
        vaddsd  xmm0, xmm0, xmm1
        add     rdi, 64
        vaddsd  xmm0, xmm2, xmm0
        add     esi, -4
        jne     .LBB0_7


New GCC can vectorize it since this commit:

    Add emulated gather capability to the vectorizer

    This adds a gather vectorization capability to the vectorizer
    without target support by decomposing the offset vector, doing
    sclar loads and then building a vector from the result.  This
    is aimed mainly at cases where vectorizing the rest of the loop
    offsets the cost of vectorizing the gather.</pre>
        </div>
      </p>


      <hr>
      <span>You are receiving this mail because:</span>

      <ul>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>