<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=https://github.com/llvm/llvm-project/issues/104482>104482</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            [X86] shufflevector codegen regression
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
            backend:X86,
            missed-optimization
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          nikic
      </td>
    </tr>
</table>

<pre>
    Slightly reduced from https://github.com/rust-lang/rust/issues/129132.

https://llvm.godbolt.org/z/67c7YsWha

```llvm
define <8 x i16> @test(<16 x i8> %i) {
  %i7 = shufflevector <16 x i8> %i, <16 x i8> <i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
  %1 = bitcast <16 x i8> %i7 to <8 x i16>
  %i10 = shufflevector <8 x i16> %1, <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
  %i11 = shufflevector <8 x i16> %i10, <8 x i16> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
  %i12 = shufflevector <8 x i16> %i11, <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 5, i32 4>
  ret <8 x i16> %i12
}
```

LLVM 16:
```
test:                                   # @test
        pxor xmm1, xmm1
        punpckhbw       xmm0, xmm1              # xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
 pshufd  xmm0, xmm0, 78                  # xmm0 = xmm0[2,3,0,1]
 pshuflw xmm0, xmm0, 27                  # xmm0 = xmm0[3,2,1,0,4,5,6,7]
 pshufhw xmm0, xmm0, 27                  # xmm0 = xmm0[0,1,2,3,7,6,5,4]
        packuswb        xmm0, xmm0
 punpcklbw       xmm0, xmm1              # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
 ret
```
Main:
```
.LCPI0_0:
        .short  65535 # 0xffff
        .short  65535 # 0xffff
        .short  65535                           # 0xffff
 .short  65535                           # 0xffff
        .short  65535 # 0xffff
        .short  0 # 0x0
        .short  65535                           # 0xffff
 .short  0                               # 0x0
test: # @test
        pxor    xmm2, xmm2
        punpckhbw xmm0, xmm2              # xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
 movdqa  xmm1, xmm0
        punpckhbw       xmm1, xmm2 # xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
 pshufd  xmm1, xmm1, 198                 # xmm1 = xmm1[2,1,0,3]
 pshuflw xmm1, xmm1, 236                 # xmm1 = xmm1[0,3,2,3,4,5,6,7]
 pshufd  xmm1, xmm1, 147                 # xmm1 = xmm1[3,0,1,2]
 movdqa  xmm3, xmmword ptr [rip + .LCPI0_0] # xmm3 = [65535,65535,65535,65535,65535,0,65535,0]
        pand    xmm1, xmm3
 punpcklbw       xmm0, xmm2              # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
 pshufd  xmm0, xmm0, 198                 # xmm0 = xmm0[2,1,0,3]
 pshuflw xmm0, xmm0, 236                 # xmm0 = xmm0[0,3,2,3,4,5,6,7]
 pshufd  xmm0, xmm0, 156                 # xmm0 = xmm0[0,3,1,2]
 pshuflw xmm0, xmm0, 198                 # xmm0 = xmm0[2,1,0,3,4,5,6,7]
 pand    xmm0, xmm3
        packuswb        xmm1, xmm0
 movdqa  xmm0, xmm1
        ret
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJy0WNtu4zYQ_Rr6hYhBDnV98ENir4ECu0CBAr08FbrQNhtdXIlOsvv1BSmJFiWZSdzWCCJpRpwZHh6foZm0rThWnG-Q_4T83Sq5yFPdbCrxLLJVWuffN78U4niSxXfc8PyS8RwfmrrEJynPLWKPCPYI9kchT5d0ndUlgn1zaeVDkVTH_h7BXrTthbcI9hRiymCNyA6Rx-6_HakoXsr1sc7TupDrulExfiDYB2EW_tH-dkrGI1FAuj81qDPl_CAqjhHbRvgNCxog9gUjj0iu6ogQ29JAOSJtB18giDEKn7rRWJtCjNgOt6fL4VDwF57JusFLA7dTK9uKCJ9r0daV8v6_D-TuK_ti1c5gKJ4BjvRbDDB4w11sTP5wR4mxBcZGjS00NjA2E5gyYzORqUnGTGRqsjGqSr4uENXrkwqZJa1cWJkQy9pmwHh1KVle3jFfwKc9RCPrFf3BbiFnpmDqNtAYPK6TMxiYSU7LpB8pU3Qr8alC2Sw3ndXnnMykUPhYoZ9H1InWFNpgVrE3KrThcqkm6HUk3E0EZawyX7_--g2rQVPV6R61srBH_P4HATNS1NfVfc5vdYPfylLPU19t96U6Z8-n9LV_fitLMrw5z6C8ekX0a_5ThPwdgq0OO35SvtjyxZaPEss5etReanup7QXbC7aX2V5mez3b69le3_bqxw6ts6JgbsGjr2G0vBRToBS5FK_UIDoJW7zOwkL4sbAqJOiQXWj11VIcVYQNJ2lO96chfYphFmGfwtcpTZqBUkn2fGlfUzynFBkK0qwr7mSdTZ4Jd-wltHw2cWze2LSxWWOTxuaMTRnf8gWWL7B8oeUbrVXD5aIUfEtEdUsm1l-3P_9E_iTGP6C3bk91IzEOfJ_5GkvydjgcDv_-rduf2fi7B95VHunfIP9x9cQx6DpwottuVe6IDz3x4ZYwj74cMM95W5LBIcngkmRwSjI4JRmckgxOSQanJMNEksv6Jf87wePeRt7tbdQA2WNHB-yoAzvqwI46saNO7KgTO-rEjjqxo27sRu3sui-ALabxvJ8tATXuOWyxnVlhgQUfCkv6_jL0GVc7W6rem_ezpTTXRqxSLRGK9WFf6ybHZ9lg5D814owRPGEjtv5uiM50dKXxSl1Uwe9ciXU_b6BVPuEre79xfkAbbJreapzgaJzgaJzgaJzgaJzgaJzgaJxgN85bWzQHp2dbNDen7b3TbU7P9k6f47Rdvf-5NBNO36r-PlBuV39lLJkw1nB6timcqvbo-0eWf6zM90arfMPymMXJim9oCCz0Ikbi1WlD0ixP4zQnURoyIBClAfO9MPQ483hGg5XYAAGPRNSnkR9BvM4pjVmgXksZ86MQeYSXiSjW-tSobo4rfdC0ocTzIlgVScqLVp9sAaRJ9syrHLHH36MAgUINAZSibXn-UJ-lKMWPRAr1e1QtzqrZqKAP6eXYIo8UopXtNY0UstBHZiqWP_31m9U5P_IKN_zY8LYVdbW6NMXGcWSmD7C6y8O5qf_imX1q1s3nZQP_BAAA__8jiu6P">