<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=https://github.com/llvm/llvm-project/issues/54511>54511</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            Failure to merge scalar loads/broadcasts
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
            backend:X86
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          RKSimon
      </td>
    </tr>
</table>

<pre>
    https://godbolt.org/z/e7f5axvPn

clang -g0 -O3 -march=btver2
```
#include <x86intrin.h>
void splat(const float &src, float *sdst, __m128 *v128dst, __m256 *v256dst) {
    *sdst = src;
    *v128dst = _mm_set1_ps(src);
    *v256dst = _mm256_set1_ps(src);
}

splat:
  vmovss (%rdi), %xmm0 # xmm0 = mem[0],zero,zero,zero
  vmovss %xmm0, (%rsi)
  vbroadcastss (%rdi), %xmm0
  vmovaps %xmm0, (%rdx)
  vbroadcastss (%rdi), %ymm0
  vmovaps %ymm0, (%rcx)
  retq
```
we could merge all of these loads into:
```
splat:
  vbroadcastss (%rdi), %ymm0
  vmovss %xmm0, (%rsi)
  vmovaps %xmm0, (%rdx)
  vmovaps %ymm0, (%rcx)
  retq

```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJyVlEtzmzAQgH-NuGjskSUL4wMHJ24uPbTTXnrz6GVQK5ArCerk13cBO3acZCZhgGVXq28fLEivH8s6pUNEbIPoA5yV19K7NPehAu0JLrPac3Hsv7eIbBHZTHflRFvhWUXw7BvDs0YEVSO2lak3gZ4cc3I6J5Uy2yrXaYMRuz8WuW1TsO0ctn2ZPHpvNY4HJxKihfJtTHjvvEgY0TwGhej9s76JOqbBsNs1C1oMlh7kxUh5PhpBjsY1Rqu7KQyG40SATLZ4QLOXayfWuLxrml00abGDJtFizGP9yn8Kc_YH7f0taLW9buRUL9uceX3j-xgBWiDKg7bDVigJlGPTEJAMTw8QqjEN4ncE8S24PJngb8QtckRMtJEeR_rZSwYvtBIxvR_-migObyH18RPIx7eRjy-R6hoZTPr75nj9M1j5zmloSqgMFs5hv8epNtFgGBodMcybf270ze5Xb-GTmX-gvR9s2afb8LKcTJdMr9laZMkmZ8oHYV0XDE7-1JiohBNhagl83Jc6sy6423-BTXUn58o3oDjXn8XsEPxvo-CzerAxdmYA8SVfLLK6lAWXvNhzqqjRXDJRMGG4XBdS5Iul0JkT0rhYwuAiSqVQf0yrIeKvIgcdRjmzJSWUEkbZghK-ZHMCJLJaUqL2LOeEoCUxDdQ1H3IZflRZKMe0ZFdFWHQWyrksihht1RozhgS-6FLtQ_nj60_b-DYbKyjH9P8Di8tyOQ">