<html>
    <head>
      <base href="http://llvm.org/bugs/" />
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW --- - Only one version of FMA3 instruction is being generated"
   href="http://llvm.org/bugs/show_bug.cgi?id=20043">20043</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>Only one version of FMA3 instruction is being generated
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>clang
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>trunk
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>PC
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>All
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>normal
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>P
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>-New Bugs
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>unassignedclangbugs@nondot.org
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>chris.a.ferguson@gmail.com
          </td>
        </tr>

        <tr>
          <th>CC</th>
          <td>llvmbugs@cs.uiuc.edu
          </td>
        </tr>

        <tr>
          <th>Classification</th>
          <td>Unclassified
          </td>
        </tr></table>
      <p>
        <div>
        <pre>Given the following code:


#include <immintrin.h>

__m128 fmatest(__m128 x)
{
    return _mm_fmadd_ps(x, _mm_set1_ps(2.0f), _mm_set1_ps(-1.0f)); 
}


I get the following output from Clang 3.4 (using -O3 -march=core-avx2):


.LCPI0_0:
    .long    3212836864              # float -1
.LCPI0_1:
    .long    1073741824              # float 2
fmatest(float __vector(4)):                           # @fmatest(float
__vector(4))
    vbroadcastss    xmm2, dword ptr [rip + .LCPI0_0]
    vbroadcastss    xmm1, dword ptr [rip + .LCPI0_1]
    vfmadd213ps    xmm1, xmm0, xmm2
    vmovaps    xmm0, xmm1
    ret


The vmovaps would be unnecessary if an alternate fmadd instruction were used.
For instance this is what GCC 4.9 produces:


fmatest(float __vector):
    vmovaps    xmm1, XMMWORD PTR .LC1[rip]
    vfmadd132ps    xmm0, xmm1, XMMWORD PTR .LC0[rip]
    ret
.LC0:
    .long    1073741824
    .long    1073741824
    .long    1073741824
    .long    1073741824
.LC1:
    .long    3212836864
    .long    3212836864
    .long    3212836864
    .long    3212836864</pre>
        </div>
      </p>
      <hr>
      <span>You are receiving this mail because:</span>
      
      <ul>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>