<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/60632>60632</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
[opt] Failure to recognise equivalent shuffled ops
</td>
</tr>
<tr>
<th>Labels</th>
<td>
missed-optimization
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
RKSimon
</td>
</tr>
</table>
<pre>
https://gcc.godbolt.org/z/dnsebnPz5
We seeing cases where multiple uses of a node is preventing vector-combine from merging equivalent shuffles.
Sorry the test case is still more convoluted than necessary :(
```ll
define <2 x float> @foo(<2 x float> %a0, <2 x float> %a1, <2 x float> %a3, <2 x float> %a4) {
; dot product
%dp0 = fmul <2 x float> %a0, %a1
%dp1 = shufflevector <2 x float> %dp0, <2 x float> poison, <2 x i32> <i32 1, i32 undef>
%dp2 = fadd <2 x float> %dp0, %dp1
%dp3x = extractelement <2 x float> %dp2, i32 0
; scalar fdiv
%a3x = extractelement <2 x float> %a3, i32 0
%x = fdiv float %dp3x, %a3x
; first use
%xsplat0 = insertelement <2 x float> poison, float %x, i32 0
%xsplat1 = shufflevector <2 x float> %xsplat0, <2 x float> poison, <2 x i32> zeroinitializer
%vv = fmul <2 x float> %xsplat1, %a4
; second use
%a4x = extractelement <2 x float> %a4, i32 0
%q = fmul float %x, %a4x
%qsplat0 = insertelement <2 x float> poison, float %q, i32 0
%qsplat1 = shufflevector <2 x float> %qsplat0, <2 x float> poison, <2 x i32> zeroinitializer
%res = fadd <2 x float> %vv, %qsplat1
ret <2 x float> %res
}
```
opt -O3
```ll
define <2 x float> @foo(<2 x float> %a0, <2 x float> %a1, <2 x float> %a3, <2 x float> %a4) {
%dp0 = fmul <2 x float> %a0, %a1
%dp1 = shufflevector <2 x float> %dp0, <2 x float> poison, <2 x i32> <i32 1, i32 undef>
%dp2 = fadd <2 x float> %dp0, %dp1
%1 = fdiv <2 x float> %dp2, %a3
%xsplat1 = shufflevector <2 x float> %1, <2 x float> poison, <2 x i32> zeroinitializer
%vv = fmul <2 x float> %xsplat1, %a4
%2 = fmul <2 x float> %1, %a4
%qsplat1 = shufflevector <2 x float> %2, <2 x float> poison, <2 x i32> zeroinitializer
%res = fadd <2 x float> %vv, %qsplat1
ret <2 x float> %res
}
```
as the %2 fmul case will be splatted, we should be able to use %vv again:
```ll
define <2 x float> @foo(<2 x float> %a0, <2 x float> %a1, <2 x float> %a3, <2 x float> %a4) {
%dp0 = fmul <2 x float> %a0, %a1
%dp1 = shufflevector <2 x float> %dp0, <2 x float> poison, <2 x i32> <i32 1, i32 undef>
%dp2 = fadd <2 x float> %dp0, %dp1
%1 = fdiv <2 x float> %dp2, %a3
%xsplat1 = shufflevector <2 x float> %1, <2 x float> poison, <2 x i32> zeroinitializer
%vv = fmul <2 x float> %xsplat1, %a4
%qsplat1 = shufflevector <2 x float> %vv, <2 x float> poison, <2 x i32> zeroinitializer
%res = fadd <2 x float> %vv, %qsplat1
ret <2 x float> %res
}
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzsV11P4zgU_TXuy1Wr1M5H-9CHgU5f9mFXOw_77MQ3rVeOHWwnlP76lZ0wBAgI2NFIo0FCQOOee47Pyb2yuXPyqBF3JLsi2X7BO38ydvf3H99kY_SiNOJud_K-dYR9IfRA6OFYVaujEaVRfmXskdDDhdCD0A5L_dclI8meJF-G3_8gOESpj1Bxhw5uT2gRmk552SqELjwzNXDQRiBIB63FHrUPiB4rb-yyMk0pNUJtTQMN2mNYw5tO9lyh9uBOXV0rdKsp7zdj7R34E4JH5yN5qO68VAoaYxEqo3ujOo8C_Ilr0Fihc9zeQdznBqblSJ4MP0oNDwTWQRNh1xTOUCvDPWFfgaRJbQyhm2cLNOMJodfPETTj65cW2EsLKaFbIMXVIAaAsCsQxkNrjegq__0xzUSbAGF7qJtOzZYaVEUVU9Q6okZvhyTm4KKd3VVrpDN6siIZjQh2LRmFuOHwT6cF1oR9fURNB8FciNcYo8hHOHaOQDx7yyuPCpvwesyWoPcCkmnKg4-u4opbqIXsJ-X5m6sPqU2KR_yADkWHL99LvnefnZ8rqaV1PvTItI5rFfdDplI7tC9KecjgO-F5Vlms-Ma8R_r3ZH5Ba6SWXnIlL2gnzH3_6qs5Crt3KJ2JCiujxROHePrmpNI5P24eRD02bqg9_eb_yeJmlvs9Wdz8oCwm_Bbdq83X96MRo9ARanHWX4tuZCj2T-bo8NG0HpZ_sl9kxv5Gw3T9MK1enp-Dex8cJLN5_KQREpH0VeAs5F3dSX_MBj_YkR9uSe7iqSkaFM2JR6fbcGwqESKHRxE4bxHcyXRKhAVeKgRvwigeY-FHLnU4Sn0292dz_-zmflenji30C7XqQuyY2LItX-BunRfZNs3yLFmcdsW6FttSJGXOWZ7XSbopasyrdCNqWpQFLuSOJpQlNNmuKS1YssKU5jXbsEysi4JWa5Im2HCpVkr1TbjdLaRzHe7yJGd0oXiJysWbIqWNdA7F0rReNvLCvQx20XCFtLuAXpbd0ZE0UdJ591DPS6_iZdO0nmR7OHCpOhunh8XKHLV0OHO_E2Bat-isenoZlf7UlavKNIQeAsn4Z9la8y9WntBD3IEj9BA38V8AAAD__3QMOTM">