<html>
    <head>
      <base href="https://bugs.llvm.org/">
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW - Optimize lerp with two FMAs"
   href="https://bugs.llvm.org/show_bug.cgi?id=42716">42716</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>Optimize lerp with two FMAs
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>libraries
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>trunk
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>PC
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>Linux
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>enhancement
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>P
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>Scalar Optimizations
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>unassignedbugs@nondot.org
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>david.bolvansky@gmail.com
          </td>
        </tr>

        <tr>
          <th>CC</th>
          <td>llvm-bugs@lists.llvm.org
          </td>
        </tr></table>
      <p>
        <div>
        <pre>Tip from: <a href="https://devblogs.nvidia.com/lerp-faster-cuda/">https://devblogs.nvidia.com/lerp-faster-cuda/</a>

float lerp(float a, float b, float c) {
    return (1-c)*a +c*b;
}


float lerp2(float a, float b, float c)
{
    return a + c * (b - a);
}

float rf2(float a, float b, float c) {
    return std::fma(c, b, std::fma(-c, a, a));
}

Clang trunk -Ofast -std=c++17 -mfma


lerp(float, float, float):                             # @lerp(float, float,
float)
        vmovss  xmm3, dword ptr [rip + .LCPI0_0] # xmm3 = mem[0],zero,zero,zero
        vsubss  xmm3, xmm3, xmm2
        vmulss  xmm1, xmm2, xmm1
        vfmadd213ss     xmm0, xmm3, xmm1 # xmm0 = (xmm3 * xmm0) + xmm1
        ret
lerp2(float, float, float):                            # @lerp2(float, float,
float)
        vsubss  xmm1, xmm1, xmm0
        vfmadd231ss     xmm0, xmm2, xmm1 # xmm0 = (xmm2 * xmm1) + xmm0
        ret
rf2(float, float, float):                              # @rf2(float, float,
float)
        vfnmadd213ss    xmm0, xmm2, xmm0 # xmm0 = -(xmm2 * xmm0) + xmm0
        vfmadd231ss     xmm0, xmm2, xmm1 # xmm0 = (xmm2 * xmm1) + xmm0
        ret


InstCombine?

A) Missing fold: (1-c)*a +c*b -> a + c * (b - a) 
B) Fold a + c * (b - a) to llvm.fma(c, b, llvm.fma(-c, a, a)) if FMA enabled</pre>
        </div>
      </p>


      <hr>
      <span>You are receiving this mail because:</span>

      <ul>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>