<html>
    <head>
      <base href="https://bugs.llvm.org/">
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW - Suboptimal multiplications in vectorized code"
   href="https://bugs.llvm.org/show_bug.cgi?id=52039">52039</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>Suboptimal multiplications in vectorized code
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>libraries
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>trunk
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>PC
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>Windows NT
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>enhancement
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>P
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>Scalar Optimizations
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>unassignedbugs@nondot.org
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>david.bolvansky@gmail.com
          </td>
        </tr>

        <tr>
          <th>CC</th>
          <td>llvm-bugs@lists.llvm.org
          </td>
        </tr></table>
      <p>
        <div>
        <pre>int a [128];
int b[128] = {0};

void foo (void)
{
  int k;

  for(k=0; k<16; k++)
  {
    b[k] = 10 - b[k];
    a[k] = b[k] * 3;
  }
}

LLVM emits  vpmulld ymm1, ymm1, ymm2, ICC prefers multiple vpaddd.

ICC block RThroughput: 8.0 vs 8.7 (LLVM).

<a href="https://godbolt.org/z/TYro7xcfn">https://godbolt.org/z/TYro7xcfn</a>


If you change a[k] = b[k] * 3; to a[k] = b[k] * 2;

LLVM:
foo:                                    # @foo
        movdqa  xmm1, xmmword ptr [rip + b]
        movdqa  xmm0, xmmword ptr [rip + .LCPI0_0] # xmm0 = [10,10,10,10]
        movdqa  xmm2, xmm0
        psubd   xmm2, xmm1
        paddd   xmm1, xmm1
        movdqa  xmmword ptr [rip + b], xmm2
        movdqa  xmm2, xmmword ptr [rip + .LCPI0_1] # xmm2 = [20,20,20,20]
        movdqa  xmm3, xmm2
        psubd   xmm3, xmm1
        movdqa  xmmword ptr [rip + a], xmm3
        movdqa  xmm1, xmmword ptr [rip + b+16]
        movdqa  xmm3, xmm0
        psubd   xmm3, xmm1
        paddd   xmm1, xmm1
        movdqa  xmmword ptr [rip + b+16], xmm3
        movdqa  xmm3, xmm2
        psubd   xmm3, xmm1
        movdqa  xmmword ptr [rip + a+16], xmm3
        movdqa  xmm1, xmmword ptr [rip + b+32]
        movdqa  xmm3, xmm0
        psubd   xmm3, xmm1
        paddd   xmm1, xmm1
        movdqa  xmmword ptr [rip + b+32], xmm3
        movdqa  xmm3, xmm2
        psubd   xmm3, xmm1
        movdqa  xmmword ptr [rip + a+32], xmm3
        movdqa  xmm1, xmmword ptr [rip + b+48]
        psubd   xmm0, xmm1
        paddd   xmm1, xmm1
        movdqa  xmmword ptr [rip + b+48], xmm0
        psubd   xmm2, xmm1
        movdqa  xmmword ptr [rip + a+48], xmm2
        ret

vs ICC's:
foo:
        movdqu    xmm3, XMMWORD PTR .L_2il0floatpacket.0[rip]   #11.12
        movdqa    xmm0, xmm3                                    #11.17
        movdqa    xmm1, xmm3                                    #11.17
        movdqa    xmm2, xmm3                                    #11.17
        psubd     xmm0, XMMWORD PTR b[rip]                      #11.17
        psubd     xmm1, XMMWORD PTR 16+b[rip]                   #11.17
        psubd     xmm2, XMMWORD PTR 32+b[rip]                   #11.17
        psubd     xmm3, XMMWORD PTR 48+b[rip]                   #11.17
        movdqu    XMMWORD PTR b[rip], xmm0                      #11.5
        paddd     xmm0, xmm0                                    #12.19
        movdqu    XMMWORD PTR 16+b[rip], xmm1                   #11.5
        paddd     xmm1, xmm1                                    #12.19
        movdqu    XMMWORD PTR 32+b[rip], xmm2                   #11.5
        paddd     xmm2, xmm2                                    #12.19
        movdqu    XMMWORD PTR 48+b[rip], xmm3                   #11.5
        paddd     xmm3, xmm3                                    #12.19
        movdqu    XMMWORD PTR a[rip], xmm0                      #12.5
        movdqu    XMMWORD PTR 16+a[rip], xmm1                   #12.5
        movdqu    XMMWORD PTR 32+a[rip], xmm2                   #12.5
        movdqu    XMMWORD PTR 48+a[rip], xmm3                   #12.5
        ret  


Maybe missing OneUse check somewhere?</pre>
        </div>
      </p>


      <hr>
      <span>You are receiving this mail because:</span>

      <ul>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>