<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=https://github.com/llvm/llvm-project/issues/161980>161980</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            [X86] Suboptimal reusable vpshufb
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
            new issue
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          dzaima
      </td>
    </tr>
</table>

<pre>
    This code:

```c
#include<stdint.h>
#include<immintrin.h>

void foo(__m256i a, __m256i b, __m256i* dst) {
  __m256i v3 = _mm256_setr_epi8(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15, 16, 17, 20, 21, 24, 25, 28, 29, 18, 19, 22, 23, 26, 27, 30, 31);
  
  __m256i v4 = _mm256_shuffle_epi8(a, v3);
  __m256i v6 = _mm256_shuffle_epi8(b, v3);
  
  __m256i v9 = _mm256_unpacklo_epi64(v4, v6);
  __m256i v10 = _mm256_unpackhi_epi64(v4, v6);
  _mm256_storeu_si256(dst, v9);
  _mm256_storeu_si256(dst+1, v10);
}
```
via `-O3 -march=haswell` compiles to:
```asm
foo:
 vmovdqa ymm2, ymmword ptr [rip + .LCPI0_0] ; 0,1,4,5,z,z,z,z,8,9,12,13,z,z,z,z,16,17,20,21,z,z,z,z,24,25,28,29,z,z,z,z
 vpshufb ymm3, ymm1, ymm2
        vpshufb ymm2, ymm0, ymm2
 vshufps ymm2, ymm2, ymm3, 136
        vmovdqa ymm3, ymmword ptr [rip + .LCPI0_1] ; 2,3,6,7,10,11,14,15,2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31,18,19,22,23,26,27,30,31
 vpshufb ymm1, ymm1, ymm3
        vpshufb ymm0, ymm0, ymm3
 vpblendd        ymm0, ymm0, ymm1, 204
        vmovups ymmword ptr [rdi], ymm2
        vmovdqu ymmword ptr [rdi + 32], ymm0
        vzeroupper
 ret
```

not taking advantage of the reusable vpshufb's as the code shows is possible.

https://c.godbolt.org/z/79Td33z9c
</pre>
<img width="1" height="1" alt="" src="http://email.email.llvm.org/o/eJyMVVFvqzgT_TXDi9XIHgOBBx6S5ov0SSvtSnsf9i0y4ATvBcxiQ9X8-pWHpE3S6G6r1NiZc8ae4zNBOWdOvdYFJFtIdpGafGPHoj4r06motPV78aMxjlW21iA3wOmT8uVThQVK01ftFOKvztem96sG5P8eQ6brTO9H039E-Wa2pmZHawGzw6HDJDVMAb6y66K8WQBuWO08YM5gvQW-YR-wWTKQO3bowvLgtB8PejAZYMZDAhGGOAxJGLIw5BRAGmUYaUqzNAxriix04gtKICiDIIggDBIGCYOEQcIgbYPLPjQXNEfaCJc9KQ9SHkl5pADMQS7l3dcY39XYTMdjq69lkmizvOF-0NJf0MovtHtufsud-kFVP1sbyGkMmM1U7Zw-21Xwr9TG_JJ6OaG3o54OzmCSAmZ04a9szr-H3NI1zOHeLnBY724NGzxnFIOUv_wu2UunxqoBuWuUe9NtCylnle0G02rHvL34_cpVrgO-CW6l79nc2bn-R7H3rqM7fe-6NzvWbPAjg2Q7moEBbtnqt9c__s8PHJIdA7ll4Z7DMYMEwSnnh__gleAUMid58xFB9iP3kfnIe48YsiI5kYxIPrzHhAqGYIgynFxeKhCXJ5LUy98N7Foov4XNITy42_j1KZcGS2-zfcom_1s2cZUtZAz4UHsonXqTWpM6kxrzOxjqRWpF6kRqROpDakPqQim-jbsXUTyIKJ-LyB9ElEuastV9XV_xT3DLbwyPH8ScFunvRKwNJLtnV0niT0_wJLrETxq_pZ31aKdh0GP4ctT-oaeAb3rrmVc_TX9iqp5V79VJM3tkvtFs1JNTZauvIgCuHVOOYuHFwlxj3xwzjg3WOVO2erUkbbwfXGg33APuq9XJ1qVt_cqOJ8D9GXC_zn_UUp7zKqoLWecyV5EuxDrJU7HO1zxqCl2WqsrTRKm6EqnQia6rOOeCl_UxLvNjZArkmAjOY8Q4FXyFopRaqUyIDPFYcYi57pRpV207d2HvyDg36UKkIs941KpSt45eoIi9fmMUBQxaRmMRSC_ldHIQ89Y47z7TeONbevP-laXB539OpR286VT7RbJoGtviXo6T8c1UrirbAe5DzsvjZRjt37rygHs6iQPcX446F_hvAAAA___uGS7m">