<html>

    <head>

      <base href="https://bugs.llvm.org/">

    </head>

    <body><table border="1" cellspacing="0" cellpadding="8">

        <tr>

          <th>Bug ID</th>

          <td><a class="bz_bug_link 

          bz_status_NEW "

   title="NEW - [aarch64] Inefficient code in __builtin_mul_overflow with unsigned < 32 bits"

   href="https://bugs.llvm.org/show_bug.cgi?id=48292">48292</a>

          </td>

        </tr>

        <tr>

          <th>Summary</th>

          <td>[aarch64] Inefficient code in __builtin_mul_overflow with unsigned < 32 bits

          </td>

        </tr>

        <tr>

          <th>Product</th>

          <td>libraries

          </td>

        </tr>

        <tr>

          <th>Version</th>

          <td>10.0

          </td>

        </tr>

        <tr>

          <th>Hardware</th>

          <td>Other

          </td>

        </tr>

        <tr>

          <th>OS</th>

          <td>Linux

          </td>

        </tr>

        <tr>

          <th>Status</th>

          <td>NEW

          </td>

        </tr>

        <tr>

          <th>Severity</th>

          <td>enhancement

          </td>

        </tr>

        <tr>

          <th>Priority</th>

          <td>P

          </td>

        </tr>

        <tr>

          <th>Component</th>

          <td>Backend: AArch64

          </td>

        </tr>

        <tr>

          <th>Assignee</th>

          <td>unassignedbugs@nondot.org

          </td>

        </tr>

        <tr>

          <th>Reporter</th>

          <td>husseydevin@gmail.com

          </td>

        </tr>

        <tr>

          <th>CC</th>

          <td>arnaud.degrandmaison@arm.com, llvm-bugs@lists.llvm.org, smithp352@googlemail.com, Ties.Stuij@arm.com

          </td>

        </tr></table>

      <p>

        <div>

        <pre>Given the following C code:

bool umul16_overflow(uint16_t a, uint16_t b, uint16_t *result)

{

    return __builtin_mul_overflow(a,b,result);

}

aarch64 clang 10.0.1 -O2:

umul16_overflow:

        // Cast args

        and     w9, w1, #0xffff

        and     w10, w0, #0xffff

        // 16-bit to 32-bit long multiply

        mul     w9, w10, w9

        // Store upper 16 bits in w10

        lsr     w10, w9, #16

        // Store zero in x8??????????

        mov     x8, xzr

        // Check if upper 16 bits (w10) are non-zero

        cmp     w10, #0

        cset    w10, ne

        // Check if zero (x8) is non-zero??????????

        // w8 = 0

        cmp     x8, #0

        cset    w8, ne

        // Or the result against zero??????????

        orr     w0, w10, w8

        // Store

        strh    w9, [x2]

        ret

I have no idea what x8 is being used for here, it will always be zero. This is

the code without the redundant instructions:

umul16_overflow:

        // Cast args

        and     w8, w1, #0xffff

        and     w9, w0, #0xffff

        // 16-bit to 32-bit long multiply

        mul     w8, w9, w8

        // Store upper 16 bits in w9

        lsr     w9, w8, #16

        // Check if the upper 16 bits (w9) are non-zero

        cmp     w9, #0

        cset    w0, ne

        // Store

        strh    w9, [x2]

        ret

Note that we could simplify this even further by using cmp more creatively, but

this appears to be a different issue.

umul16_overflow:

        // Cast args

        and     w8, w1, #0xffff

        and     w9, w0, #0xffff

        // 16-bit to 32-bit long multiply

        mul     w8, w9, w8

        // Check if the upper 16 bits are non-zero

        cmp     wzr, w8, lsr #16

        cset    w0, ne

        // Store

        strh    w8, [x2]

        ret</pre>

        </div>

      </p>

      <hr>

      <span>You are receiving this mail because:</span>

      <ul>

          <li>You are on the CC list for the bug.</li>

      </ul>

    </body>

</html>