<html>
    <head>
      <base href="https://bugs.llvm.org/">
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW - Supoptimal code for float/double abs pattern with -Ofast"
   href="https://bugs.llvm.org/show_bug.cgi?id=42179">42179</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>Supoptimal code for float/double abs pattern with -Ofast
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>libraries
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>trunk
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>PC
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>Linux
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>enhancement
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>P
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>Scalar Optimizations
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>unassignedbugs@nondot.org
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>david.bolvansky@gmail.com
          </td>
        </tr>

        <tr>
          <th>CC</th>
          <td>llvm-bugs@lists.llvm.org
          </td>
        </tr></table>
      <p>
        <div>
        <pre>Compiled with flags -Ofast -march=haswell

Pretty common ABS code..

float absfast(float value)
    {
    if (value >=  0.0f)
        return value;
    else
        return -value;
}

Instead of 
absfast:
        vandps  xmm0, xmm0, XMMWORD PTR .LC0[rip]
        ret

We have
absfast:                                # @absfast
        vbroadcastss    xmm1, dword ptr [rip + .LCPI0_0] # xmm1 =
[-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
        vxorps  xmm1, xmm0, xmm1
        vxorps  xmm2, xmm2, xmm2
        vcmpltss        xmm2, xmm0, xmm2
        vblendvps       xmm0, xmm0, xmm1, xmm2
        ret

Which is very slow.., see abs8


test   description   absolute   operations   ratio with
number               time       per second   test0

 0    "float fabs"   0.00 sec   24729.52 M     1.00
 1   "float fabsf"   0.00 sec   24798.51 M     1.00
 2 "float std abs"   0.00 sec   24744.82 M     1.00
 3    "float abs1"   0.00 sec   24806.20 M     1.00
 4    "float abs2"   0.00 sec   24744.82 M     1.00
 5    "float abs6"   0.00 sec   24867.89 M     0.99
 6    "float abs7"   0.01 sec   8431.70 M     2.93
 7    "float abs8"   0.21 sec   373.54 M     66.20
 8    "float abs9"   0.00 sec   24737.17 M     1.00

Total absolute time for float absolute value: 0.25 sec</pre>
        </div>
      </p>


      <hr>
      <span>You are receiving this mail because:</span>

      <ul>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>