<html>
    <head>
      <base href="https://bugs.llvm.org/">
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW - USRA is replaced with USHR+ORR which results in poor codegen"
   href="https://bugs.llvm.org/show_bug.cgi?id=49577">49577</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>USRA is replaced with USHR+ORR which results in poor codegen
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>libraries
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>trunk
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>PC
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>Linux
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>enhancement
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>P
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>Backend: AArch64
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>unassignedbugs@nondot.org
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>kutdanila@yandex.ru
          </td>
        </tr>

        <tr>
          <th>CC</th>
          <td>arnaud.degrandmaison@arm.com, llvm-bugs@lists.llvm.org, smithp352@googlemail.com, Ties.Stuij@arm.com
          </td>
        </tr></table>
      <p>
        <div>
        <pre>int MoveMask(uint8x16_t input)
{
    uint16x8_t high_bits = vreinterpretq_u16_u8(vshrq_n_u8(input, 7));
    uint32x4_t paired16 =
        vreinterpretq_u32_u16(vsraq_n_u16(high_bits, high_bits, 7));
    uint64x2_t paired32 =
        vreinterpretq_u64_u32(vsraq_n_u32(paired16, paired16, 14));
    uint8x16_t paired64 =
        vreinterpretq_u8_u64(vsraq_n_u64(paired32, paired32, 28));
    return vgetq_lane_u8(paired64, 0) | ((int) vgetq_lane_u8(paired64, 8) <<
8);
}

Generates for vsraq_n_u16 and vsraq_n_u32 USHR+ORR instead of USRA like GCC
does

<a href="https://gcc.godbolt.org/z/MxP63x">https://gcc.godbolt.org/z/MxP63x</a>

Also in Match function there are two redundant AND with 0xff

Match(unsigned char):                              // @Match(unsigned char)
        adrp    x8, ctrl
        ldr     q0, [x8, :lo12:ctrl]
        dup     v1.16b, w0
        cmeq    v0.16b, v1.16b, v0.16b
        movi    v1.16b, #1
        and     v0.16b, v0.16b, v1.16b
        ushr    v1.8h, v0.8h, #7
        orr     v0.16b, v1.16b, v0.16b
        ushr    v1.4s, v0.4s, #14
        orr     v0.16b, v1.16b, v0.16b
        usra    v0.2d, v0.2d, #28
        umov    w8, v0.b[0]
        umov    w9, v0.b[8]
        and     x0, x8, #0xff // Not needed?
        and     x8, x9, #0xff // Not needed?
        bfi     x0, x8, #8, #8
        ret</pre>
        </div>
      </p>


      <hr>
      <span>You are receiving this mail because:</span>

      <ul>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>