<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=https://github.com/llvm/llvm-project/issues/114959>114959</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            Missing optimization in zip_float
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
            new issue
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          junaire
      </td>
    </tr>
</table>

<pre>
    Source:
```c
#include <immintrin.h>

void zip_float(const double *src, double *dst) {
 __m256d s0 = _mm256_broadcast_pd((__m128d*)src);
    __m256d s1 = _mm256_broadcast_pd((__m128d*)src + 2);
    __m256d s = _mm256_shuffle_pd(s0, s1, 0xc);
    s = _mm256_mul_pd(s, s);
 _mm256_store_pd(dst, s);
}
```

LLVM:
```
zip_float:
 vmovupd xmm0, xmmword ptr [rdi]
        vmovupd xmm1, xmmword ptr [rdi + 32]
        vunpcklpd       xmm2, xmm0, xmm1
        vunpckhpd xmm0, xmm0, xmm1
        vinsertf128     ymm0, ymm2, xmm0, 1
 vmulpd  ymm0, ymm0, ymm0
        vmovapd ymmword ptr [rsi], ymm0
 vzeroupper
        ret
```

GCC:
```
zip_float:
 vbroadcastf128  ymm0, XMMWORD PTR [rdi]
        vbroadcastf128  ymm1, XMMWORD PTR [rdi+32]
        vshufpd ymm0, ymm0, ymm1, 12
 vmulpd  ymm0, ymm0, ymm0
        vmovapd YMMWORD PTR [rsi], ymm0
 vzeroupper
        ret
```

Godbolt: https://godbolt.org/z/ffz1YEhPE
Tweeted by FFmpeg: https://x.com/FFmpeg/status/1853326818008514900
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJysVV2PmzgU_TWXF6uRfQ0DPPAwmZR96WirbrW7fYoAm8RdjJE_Mpn8-hWENF8zK3XVKALd3HsO95yDnMo5temlLCBZQrKKquC3xhbfQ18pK6PaiNfiDxNsI4E_Al0BfYQHevw2c41c9U0XhCTAn5TWqvdW9Yst8I_zxHTdGSXIQQ3rtjOVB8wa0ztPhAl1Jwngo7MN4NPFD8J5wJxAujwykPVaY_IgiKME-Iqs9Viua2sq0VTOrwcBmAFm67VmmAnAR8B8os2Bn0gIOfOwn-MhgEuC77JdkrltaNtOHqkcHYU5Nl7p_m6dK6AO3QyaMFezJ25v7Mw8WXQzBunqJqnLGD59-vP5PstjeU7nNEB22uzCIMhe60nEXusXYwUZvCWQLK1QkKzOUsbPBYS9DZl85HiPDP3Q_NMNYq73WuPMcHo4ewuxvV7wvVnVO2l9yzCb6td58vXmKeyH9DCtcjF3vt8prgYxdq6Uusmca8juIK0JwyDtNYeV_j9S--3p6WdC-_EqH8WeNv_7-fmv37-syOevX94N7x7K3oHi8q0Ixxf_aMWtaRMPw__t7rebFX6hu0bUphsNJFvvBzc6iSVguTk2FsZuAMsDYNm2B_bt4_bzfLR9fZHSS0HqV1KWepCbe479ojEasJz7WDpf-eAAS5YlnONDxjJKs4TFOaWRKLjIeV5FsmApp2mes5xH2yJOZUo5zeOWNjKLUeRCpqLmbfPQJDHjkSqQYswYTWjOOeML3jZt1eQc2zRFWsUQU6kr1S26bqdHRZFyLsiCsThP8qiratm56X8AsZcvZOoCjhFHthhBH-qwcRDTTjnvzjRe-U4Wz8o51W-IGbzS6lB5ZXqi-vOJHwXbFTf2Kr8N9ezPyDffPgzWfJeNByynLSazjmvuCvw3AAD__y6D51M">