<html>
    <head>
      <base href="https://bugs.llvm.org/">
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW - Commutable subtractions"
   href="https://bugs.llvm.org/show_bug.cgi?id=46599">46599</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>Commutable subtractions
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>libraries
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>trunk
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>PC
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>Windows NT
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>enhancement
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>P
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>Common Code Generator Code
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>unassignedbugs@nondot.org
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>llvm-dev@redking.me.uk
          </td>
        </tr>

        <tr>
          <th>CC</th>
          <td>craig.topper@gmail.com, efriedma@quicinc.com, lebedev.ri@gmail.com, llvm-bugs@lists.llvm.org, spatel+llvm@rotateright.com
          </td>
        </tr></table>
      <p>
        <div>
        <pre>If a subtraction result is only ever used for certain instructions (f)abs,
(f)mul etc. we might be able to tag the subtraction as "commutable".

This is particularly interesting for SSE memory folding (either direct or via
folding stack operations):
<a href="https://godbolt.org/z/zkELgP">https://godbolt.org/z/zkELgP</a>

#include <x86intrin.h>

auto dp_offset(__m128 *x, __m128 *y, __m128 z, __m128 w) {
    __m128 xx = _mm_sub_ps(*x, z);
    __m128 yy = _mm_sub_ps(*y, w);
    return _mm_add_ps(
        _mm_mul_ps(xx, xx),
        _mm_mul_ps(yy, yy)
    );
}
auto dp_offset_commute(__m128 *x, __m128 *y, __m128 z, __m128 w) {
    __m128 xx = _mm_sub_ps(z, *x);
    __m128 yy = _mm_sub_ps(w, *y);
    return _mm_add_ps(
        _mm_mul_ps(xx, xx),
        _mm_mul_ps(yy, yy)
    );
}

clang -g0 -O3 -march=btver2

dp_offset:
  vmovaps (%rdi), %xmm2
  vmovaps (%rsi), %xmm3
  vsubps %xmm0, %xmm2, %xmm0
  vsubps %xmm1, %xmm3, %xmm1
  vmulps %xmm0, %xmm0, %xmm0
  vmulps %xmm1, %xmm1, %xmm1
  vaddps %xmm1, %xmm0, %xmm0
  retq
dp_offset_commute:
  vsubps (%rdi), %xmm0, %xmm0
  vsubps (%rsi), %xmm1, %xmm1
  vmulps %xmm0, %xmm0, %xmm0
  vmulps %xmm1, %xmm1, %xmm1
  vaddps %xmm1, %xmm0, %xmm0
  retq</pre>
        </div>
      </p>


      <hr>
      <span>You are receiving this mail because:</span>

      <ul>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>