<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=https://github.com/llvm/llvm-project/issues/119093>119093</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            [AArch64] Failure to use `bfi` to combine registers
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
            backend:AArch64,
            missed-optimization
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          Kmeakin
      </td>
    </tr>
</table>

<pre>
    ```c
#include <array>
#include <cstdint>

using u8 = std::uint8_t;
using u16 = std::uint16_t;
using u32 = std::uint32_t;
using u64 = std::uint64_t;

auto u8x2_to_u16(u8 b0, u8 b1) -> std::array<u8, 2> { return {b0, b1}; }
auto u16x2_to_u32(u16 h0, u16 h1) -> std::array<u16, 2> { return {h0, h1}; }
auto u32x2_to_u64(u32 w0, u32 w1) -> std::array<u32, 2> { return {w0, w1}; }

auto u8x4_to_u32(u8 b0, u8 b1, u8 b2, u8 b3) -> std::array<u8, 4> {
    return {b0, b1, b2, b3};
}

auto u16x4_to_u64(u16 h0, u16 h1, u16 h2, u16 h3) -> std::array<u16, 4> {
    return {h0, h1, h2, h3};
}

auto u8x8_to_u64(u8 b0, u8 b1, u8 b2, u8 b3, u8 b4, u8 b5, u8 b6, u8 b7)
    -> std::array<u8, 8> {
    return {b0, b1, b2, b3, b4, b5, b6, b7};
}

```

LLVM uses `bfi` in `u8x2_to_u16` and `u16x2_to_u32`, but not in the other functions:

```asm
u8x2_to_u16(unsigned char, unsigned char):
        bfi     w0, w1, #8, #24
        ret

u16x2_to_u32(unsigned short, unsigned short):
        bfi     w0, w1, #16, #16
 ret

u32x2_to_u64(unsigned int, unsigned int):
        mov     w8, w0
 orr     x0, x8, x1, lsl #32
        ret

u8x4_to_u32(unsigned char, unsigned char, unsigned char, unsigned char):
        and     w8, w2, #0xff
        lsl     w8, w8, #16
        orr     w8, w8, w3, lsl #24
 bfi     w8, w1, #8, #8
        bfxil   w8, w0, #0, #8
        mov w0, w8
        ret

u16x4_to_u64(unsigned short, unsigned short, unsigned short, unsigned short):
        and     w8, w2, #0xffff
        lsl     x8, x8, #32
        orr     x8, x8, x3, lsl #48
        bfi     x8, x1, #16, #16
        bfxil   x8, x0, #0, #16
        mov     x0, x8
 ret

u8x8_to_u64(unsigned char, unsigned char, unsigned char, unsigned char, unsigned char, unsigned char, unsigned char, unsigned char):
 and     w8, w6, #0xff
        and     w9, w5, #0xff
        lsl     x8, x8, #48
        orr     x8, x8, x7, lsl #56
        orr     x8, x8, x9, lsl #40
        and     w9, w4, #0xff
        orr     x8, x8, x9, lsl #32
 and     x9, x0, #0xff
        bfi     x8, x3, #24, #8
        bfi x8, x2, #16, #8
        bfi     x8, x1, #8, #8
        orr     x0, x8, x9
 ret
```

The optimal code would be:
```asm
u8x2_to_u16(unsigned char, unsigned char):
        bfi     w0, w1, #8, #24
 ret

u16x2_to_u32(unsigned short, unsigned short):
        bfi     w0, w1, #16, #16
        ret

u32x2_to_u64(unsigned int, unsigned int):
 bfi     x0, x1, #32, #32
        ret

u8x4_to_u32(unsigned char, unsigned char, unsigned char, unsigned char):
        bfi     w0, w1, #8, #8
 bfi     w0, w2, #16, #8
        bfi     w0, w3, #24, #8
 ret

u16x4_to_u64(unsigned short, unsigned short, unsigned short, unsigned short):
        bfi     x0, x1, #16, #16
        bfi     x0, x2, #32, #16
 bfi     x0, x3, #48, #16
        ret

u8x8_to_u64(unsigned char, unsigned char, unsigned char, unsigned char, unsigned char, unsigned char, unsigned char, unsigned char):
        bfi     x0, x1, #8, #8
        bfi x0, x2, #16, #8
        bfi     x0, x3, #24, #8
        bfi     x0, x4, #32, #8
        bfi     x0, x5, #40, #8
        bfi     x0, x6, #48, #8
 bfi     x0, x7, #56, #8
        ret
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzMWE1v6ygU_TVkg1rhiz8XXqTty2be7EazrfxBYuY5dmVwmze_foTBCTjE6eiNqomqGMMx53Lu4VKnEIIfOsZyFD2h6GVTjLLph_y3Iyt-8G5T9vXPHMVE_1WIbBFQ3lXtWDOM6HMxDMVPRL9dDVRC1ryTZohsR8G7Ax5TjOgLFrJGdIvoduSdTF8lok8XSBBfY4J4AaJwDaKwAMXhNSgOZxAi22KUPR7TE7zK_nUMYgTpmOKSIHhWoZYBggw_IPrtModZ8POYKhCoMZQ84YHJcehUUz9dBih5QfQJq8tMFMSGiYJiCmLcaCrVWuNSkXnJ9PONj4yCIYtDRUYBf2gy1VojU8F5yfTzHy6ZpWJorW2hom7A3KD3dA0NOyJbjLFPXfU9zVdSHY-KxY0oiE1IWoEruU0Lzq21sHQKbsZ1ToT6nmZsbsaVnlIrrLtK6UY4N6K5Ec-NBEFmAlrTNP1XmqrviVMTarYyuV7TuTjo2-_f__wdj4IJjGJS7jmKCeadurH3WUxw0dVTr70pYh3JKHHXS_WYbBjuZcMGvB-7SvK-E2ppLnEhjmrPu9u4m8pajaumGCah3I5MT4PNp9zz6Xq2ODxjBDQ1Vwgt7MCkKWmL_TwziKYfpMNpej5Hqp2mG2Rr0S129Dy5qrI22XS_oDr275pqWtGHyhXuh2HqO030p2nkNAXRilbxU_Cu2t3od3S-37GIVNnCihSMFuS031soFaGFSh3FzGden435oNb6dFbPWUi9qU-dhJ14a6toYvNAleAmr-kt74SeZN70zmd6Pi-lV0xjgXnlTvrPZrEwJ1vMMPU42zbVtbMXqhrwUlUHO_v47Flng7iF9Zed-V90zClZ5CL22_qMyiZUtG7-Rb6cDHjzlVj5inxbxUFndnbJ7ShDf5R359T-mifTQ5fsu5MtDEUvddm3S_mMg6XxPmFS38b3lsrMMp97Bv6hTq03yY9Fi6u-ZvijH9sal8ycXV99cH3xiWU-v3ZwnbNDnOxQ8FWnrzqc7qidOucJcaruqgUN1m_rLz0zbsh-q3I7YFjmSIMXKHopV6um-R8X83Wt_P858KVK6zWJfK7OWdhwqf4Kdj5YQt-_LgtsvMxX6klqYsYi36IWNXJT57TOaFZsWB4klMaEApBNkwONSLSPkzII9yzMEpalZVEADWoIsrgkG54DgTAAkgCBhJBHCKIySIo6DKs0BVahkLBjwdvHtn0_PvbDYcOFGFkeBBnJ6KYtStaK6ZcOgLKofrBOvSltt0PVKJsp5RDAkQvB6oepgvO_C_XWocail82Qq4kfyvEgUEhaLqS4UEku2-lnlHm-6AXvCt6OA8PqtU8w65VI9rjqjyXvGB7YgQvJBrEZhzZvpHyb3nFgh2B34LIZy8eqPyLYKSZzeXgb-r9YJRHsphUKBDuzyPcc_gkAAP__f_PAVg">