<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=http://email.email.llvm.org/c/eJzNWN9vqzYU_mvIiwUCQ0jykIe2aaVJnSZt0t3jZIxJvGtwZhua9q_fsSENaSDJzXKlUYQhnB_fOfY5_kom8_fln9xskNkwVEgh5Buv1uj19duv6JffPfzkXjw8KLpJE5QR-p1VOeIabZXMa2plSYXYziiCtJGKIV5pw0iOZIGoLDNeWRleGYlAggkPPzq5AHnhygsfBq5p2J3uUQiK_NIovhXMi1eEtFD8km5reK6YbJjSzG8i5P8WI78T8HUDvzEKvvyMG-2XvAJxPE1RydSaBUL0neas4BVDjeQ58pLQiXh4ztPEww_Iw1Nic9E9VpKSrakhWAWRykq8W4nsogT18AJ5s8fW5WvAKqPevbiDgKxIiAAkAsCUaIMO7imCBHrx0xztUCEkMV78DK96mpHThFf5qdzTgKrz1tPHTr-g5RbVkIkvCs6DNfTBlIQ5NZwIDvc9A_EI9Owi9OQG6HFPf9pCz3kzBDtxRiz8g0Y6gpWcx2oIF4gSIT4XihBNGZREf2d50K7rZl7EONiGboQlNIBoOh5W6hZRjFHSk-FRpwj2Fr0oZi6KnVRDgk6bR8iomjmjd7sFsPfNR3JrPma9fChmHIquqmer4_Kmgih2O8xhhHtwvCyJWn9F-AnuS0drr20DZbrXdcFtJQ1zLVfzHHqp1jVDb_v-bAEj6GVMFIho9MZs4jlBpWw8_MKkgivUb8Ws5_hcU237Wwxpu3SASTgPPbHLdnts7XqwYxhoG30j5scCgMyNH7gV8HDsRxc8xatwV3w5jq2KPHpzGrNH9AGukZ1tsL4NwcCH8zN93GFvujrWg-bG_rFwoxbNQf4j_MQXBuFIDHut6NPLYDTXxwEztrecOxT7EQf5saSbVYscnyCPDsjPZCkay1J0miXooxbXPidOvuxnyfk80tFmZEbw3k944udIp48vGteBIj9TUg9CS2QrqOUtxKBa27rS0KJIJqCw3rdQcMSKbaCAXJ2RklkyA69LW4BmI-v1BmXMGKbaiqMyZ4davgNHcZXk71FdwUT-2sv-ZEridhV6iYM0Fg2DLpdcSUZGFJ7OGWuRDPMTKoE4EWr2RGXQxh0YyxWBJncJdJzMjIZ2C6u5FM8123m1a5J2p-zu2r1yFOf0ivBPNvkj4fPs53hBSOjvty6InxH9YEC3RX9HrvMjUVwIYJABnQZwngrZ3h-v3Pl8ibQcWuEZ9vKFtBx0xtnLXQjGzVvuTcTklo0adsc2dhxkx57Ad_Z_2dQn-TLOF_GCTEgNG7JaKk2lMSWpBHuf1EosN8ZsteW4bqbXwJDrLKASon-x670bfNjY_2bUwKOj0pCUl2k8n8eTzTJLpgVOF7N5kS_mjIK3NItjlmZREtJ5HE1gxTChl4DZw7hiby0bh3sIYPLfEfAlDjGcUQp_UQizO5_OU1oUYZGkYbKgsHpZCT0pcAUs1Xqils5kVq-1rWuujT68JFrzdcWYAwwIDTeCLbsvN7ZY_vj2bIdKtvyj-zrT_XvR9ofPTzhAZSrNqZ440EuH-F_oqf4U>53883</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
AArch64: SVE: no merge combine with masked store intrinsics
</td>
</tr>
<tr>
<th>Labels</th>
<td>
new issue
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
rscottmanley
</td>
</tr>
</table>
<pre>
With the following LLVM IR, the AArch64 backend is producing an extra store instead of combining into a sel+store.
```
llc -mtriple=aarch64 -mcpu=neoverse-v1 -O3 -aarch64-sve-vector-bits-min=256 merge.ll
define void @merge(i64* %a, i64* nocapture readonly %b, i64* nocapture readonly %c) {
L.entry:
%0 = bitcast i64* %c to <8 x float>*
%1 = load <8 x float>, <8 x float>* %0
%2 = fcmp une <8 x float> %1, zeroinitializer
%3 = bitcast i64* %b to <8 x float>*
%4 = load <8 x float>, <8 x float>* %3
%5 = fdiv <8 x float> %4, %1
%6 = bitcast i64* %a to <8 x float>*
tail call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %5, <8 x float>* %6, i32 4, <8 x i1> %2)
%7 = xor <8 x i1> %2, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
tail call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %4, <8 x float>* %6, i32 4, <8 x i1> %7)
ret void
}
declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32 immarg, <8 x i1>)
```
produces following (note the side issue with the mask itself as well via mov/eor/cmpne):
```
merge: // @merge
ptrue p0.s, vl8
mov z2.s, #-1 // =0xffffffffffffffff
ld1w { z0.s }, p0/z, [x2]
fcmeq p1.s, p0/z, z0.s, #0.0
mov z1.s, p1/z, #-1 // =0xffffffffffffffff
eor z1.d, z1.d, z2.d
cmpne p2.s, p0/z, z1.s, #0
ld1w { z1.s }, p0/z, [x1]
fdivr z0.s, p0/m, z0.s, z1.s
st1w { z0.s }, p2, [x0]
st1w { z1.s }, p1, [x0]
ret
```
Also note, that using scalable types also has the same problem (though better mask code)
```
llc -mtriple=aarch64 -mcpu=neoverse-v1 -O3 merge-scalable.ll
define void @merge_scalable(i64* %a, i64* nocapture readonly %b, i64* nocapture readonly %c) {
%c0 = bitcast i64* %c to <vscale x 4 x float>*
%1 = load <vscale x 4 x float>, <vscale x 4 x float>* %c0
%2 = fcmp contract une <vscale x 4 x float> %1, zeroinitializer
%3 = bitcast i64* %b to <vscale x 4 x float>*
%4 = load <vscale x 4 x float>, <vscale x 4 x float>* %3
%5 = fdiv <vscale x 4 x float> %4, %1
%6 = bitcast i64* %a to <vscale x 4 x float>*
tail call void @llvm.masked.store.nxv4f32.p0nxv4f32(<vscale x 4 x float> %5, <vscale x 4 x float>* %6, i32 4, <vscale x 4 x i1> %2)
%7 = fcmp contract oeq <vscale x 4 x float> %1, zeroinitializer
tail call void @llvm.masked.store.nxv4f32.p0nxv4f32(<vscale x 4 x float> %4, <vscale x 4 x float>* %6, i32 4, <vscale x 4 x i1> %7)
ret void
}
declare void @llvm.masked.store.nxv4f32.p0nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>*, i32 immarg, <vscale x 4 x i1>)
```
.s ==>
```
merge_scalable: // @merge_scalable
ptrue p0.s
ld1w { z0.s }, p0/z, [x2]
ld1w { z1.s }, p0/z, [x1]
fcmeq p1.s, p0/z, z0.s, #0.0
fdivr z0.s, p0/m, z0.s, z1.s
not p2.b, p0/z, p1.b
st1w { z0.s }, p2, [x0]
st1w { z1.s }, p1, [x0]
ret
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzNWN-PqjgU_mvwpYFAQdQHH2bGuckms9lkN7n7uCnloN1bqEuL48xfv6cFFUdR13iTRUJBzo_vnPacfiFT-cf8T2FWxKyAFEpK9S6qJXl7-_4r-eV3j764F09PNV-lCckY_wFVToQm61rlDbeyrCKwNTUj2qgaiKi0AZYTVRCuykxUVkZURhGUAOnRZycXEC9ceOHTmWsadqd7lJITvzS1WEvw4gVjLRS_5OsGnytQG6g1-JuI-L_FxO8EfL3B_4CjLz8TRvulqFCcjlNSQr2EQMq-0xwKUQHZKJETLwmdiEenIk08-kQ8OmY2F91jpThbmwaDrTFSVckPK5FdleAenRFv8ty6fAugMvWHF3cQiBUJCYIkCJgzbcjBPSeYQC9-mZItKaRixotf8VVPM3Ka-Co_lXs5o-q89fSp0y94uSYNZuKLgvNgDX1CrXBOjWBS4H3PQDwAPbsKPbkDetzTH7fQc7E5BztxRiz8g0Y6gJVdxmqYkIQzKfcLRcpNGZRM_4A8aNf1ZlrENFiHbsQldAbReDis1C2imJKkJyOiThHtzXpRTFwUW1WfE3TaIiKmbsAZfdgtgn1sPpJ78zHp5aMG41B0VT1ZHJc3l6yG-2GeR7gDJ8qS1cuvCPfgvnS09to2UNC9rotuK2XAtVwtcuylWjdA3nf92QIm2MtAFoRp8g428YKRUm08-g1UjVes3wqs5_hSU237W4xpu3agSTwPPbHLdnus7XqwYxhoG_1GTo8FEJkbP2kr4NHYj654ihfhtvhyHFuVefTuNCbP5BNdEzvbaH0dooFP52f8vKXeeHGsh80N_rFwoxbNQf4z3OMLg3Aghp1WtPdyNprb48AZ21nOHYrdSIP8WNLNqkVOT5BHB-QXshQNZSk6zRL2UYtrlxMnX_az5Hwe6WgzMCN05yc88XOk08cXDetgkV8oqSepFbEV1PIWZkijbV1pbFEsk1hYH2ssOGbFVlhArs5YCZbM4OvSFqBZqWa5IhkYA3VbcVzlcKjlB3AUV0n-DtUNTOSvnexPpiRuV-HXOMjGogHscsmNZGRA4eWSsRbJeX7CFRInxs2OqJy18QDGckOgyUMCHSYzg6Hdw2quxXPLdl5tN0m7U3Z37V45iHN8Q_gnm_yR8GX2c7wgFPb3exfEz4j-bED3Rf9ArvNforgSwFkGdBrAZSpke3-8cOfrNdJyaIUX2MsX0nLQGWYvDyEYd2-5dxGTezZq3B3b2GmQHXtC39n_ZVMf5fM4n8UzNjLCSJh33x_slP_x_dUOlWp30e4bQ0eS21W-_xCBG3KlBdejppbzlTFrbVmxWxtLFG-yALXxwVZIN_hIBf4GbvDRkW9M47dxPJ3Go9W8SCaTjM94TmmS5YyN4yLPCkinLC4AIB3hGgOp5xilR2kF7y1_x3sMeSTmNKR4Rin-ohBnczqeprwowiJJw2TGcbVCiT0ocAWr6uWonjtIWbPUto6FNvrwkmktlhWAc4f2WYPUpZ7XmitjSlZJ-Bg5_3OH_1_Sbuo7">