<html>

    <head>

      <base href="https://bugs.llvm.org/">

    </head>

    <body><table border="1" cellspacing="0" cellpadding="8">

        <tr>

          <th>Bug ID</th>

          <td><a class="bz_bug_link 

          bz_status_NEW "

   title="NEW - fast math flags on fcmp scalarizes on aarch64"

   href="https://bugs.llvm.org/show_bug.cgi?id=45036">45036</a>

          </td>

        </tr>

        <tr>

          <th>Summary</th>

          <td>fast math flags on fcmp scalarizes on aarch64

          </td>

        </tr>

        <tr>

          <th>Product</th>

          <td>libraries

          </td>

        </tr>

        <tr>

          <th>Version</th>

          <td>trunk

          </td>

        </tr>

        <tr>

          <th>Hardware</th>

          <td>PC

          </td>

        </tr>

        <tr>

          <th>OS</th>

          <td>Linux

          </td>

        </tr>

        <tr>

          <th>Status</th>

          <td>NEW

          </td>

        </tr>

        <tr>

          <th>Severity</th>

          <td>normal

          </td>

        </tr>

        <tr>

          <th>Priority</th>

          <td>P

          </td>

        </tr>

        <tr>

          <th>Component</th>

          <td>Backend: AArch64

          </td>

        </tr>

        <tr>

          <th>Assignee</th>

          <td>unassignedbugs@nondot.org

          </td>

        </tr>

        <tr>

          <th>Reporter</th>

          <td>andrew.b.adams@gmail.com

          </td>

        </tr>

        <tr>

          <th>CC</th>

          <td>arnaud.degrandmaison@arm.com, llvm-bugs@lists.llvm.org, smithp352@googlemail.com, Ties.Stuij@arm.com

          </td>

        </tr></table>

      <p>

        <div>

        <pre>Created <span class=""><a href="attachment.cgi?id=23176" name="attach_23176" title="ll that demonstrates the problem">attachment 23176</a> <a href="attachment.cgi?id=23176&action=edit" title="ll that demonstrates the problem">[details]</a></span>

ll that demonstrates the problem

Any fast math flags on an fcmp seem to cause the aarch64 backend to emit a

sequence of scalar fcmp instructions instead of a single vector fcmgt

instruction.

The following ll shows the same comparison, with and without a fast math flag.

I turned them all on, but it seems that any fast math flag will do.

define <4 x i1> @good(<4 x float> %a, <4 x float> %b) {

  %c = fcmp olt <4 x float> %a, %b

  ret <4 x i1> %c

}

define <4 x i1> @bad(<4 x float> %a, <4 x float> %b) {

  %c = fcmp fast olt <4 x float> %a, %b

  ret <4 x i1> %c

}

Output from llc -march=aarch64 -filetype=asm fcmgt.ll  -o -

...

good:  

        fcmgt   v0.4s, v1.4s, v0.4s

        xtn     v0.4h, v0.4s

        ret

...

bad:

        mov     s2, v1.s[1]

        mov     s3, v0.s[1]

        fcmp    s3, s2

        csetm   w8, lt

        fcmp    s0, s1

        mov     s4, v1.s[2]

        mov     s2, v0.s[2]

        csetm   w9, lt

        mov     s3, v1.s[3]

        mov     s0, v0.s[3]

        fcmp    s2, s4

        fmov    s1, w9

        csetm   w9, lt

        fcmp    s0, s3

        mov     v1.s[1], w8

        mov     v1.s[2], w9

        csetm   w8, lt

        mov     v1.s[3], w8

        xtn     v0.4h, v1.4s

        ret

...</pre>

        </div>

      </p>

      <hr>

      <span>You are receiving this mail because:</span>

      <ul>

          <li>You are on the CC list for the bug.</li>

      </ul>

    </body>

</html>