<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=http://email.email.llvm.org/c/eJzNWN9vqzYU_mvIiwUCQ0jykIe2aaVJnSZt0t3jZIxJvGtwZhua9q_fsSENaSDJzXKlUYQhnB_fOfY5_kom8_fln9xskNkwVEgh5Buv1uj19duv6JffPfzkXjw8KLpJE5QR-p1VOeIabZXMa2plSYXYziiCtJGKIV5pw0iOZIGoLDNeWRleGYlAggkPPzq5AHnhygsfBq5p2J3uUQiK_NIovhXMi1eEtFD8km5reK6YbJjSzG8i5P8WI78T8HUDvzEKvvyMG-2XvAJxPE1RydSaBUL0neas4BVDjeQ58pLQiXh4ztPEww_Iw1Nic9E9VpKSrakhWAWRykq8W4nsogT18AJ5s8fW5WvAKqPevbiDgKxIiAAkAsCUaIMO7imCBHrx0xztUCEkMV78DK96mpHThFf5qdzTgKrz1tPHTr-g5RbVkIkvCs6DNfTBlIQ5NZwIDvc9A_EI9Owi9OQG6HFPf9pCz3kzBDtxRiz8g0Y6gpWcx2oIF4gSIT4XihBNGZREf2d50K7rZl7EONiGboQlNIBoOh5W6hZRjFHSk-FRpwj2Fr0oZi6KnVRDgk6bR8iomjmjd7sFsPfNR3JrPma9fChmHIquqmer4_Kmgih2O8xhhHtwvCyJWn9F-AnuS0drr20DZbrXdcFtJQ1zLVfzHHqp1jVDb_v-bAEj6GVMFIho9MZs4jlBpWw8_MKkgivUb8Ws5_hcU237Wwxpu3SASTgPPbHLdnts7XqwYxhoG30j5scCgMyNH7gV8HDsRxc8xatwV3w5jq2KPHpzGrNH9AGukZ1tsL4NwcCH8zN93GFvujrWg-bG_rFwoxbNQf4j_MQXBuFIDHut6NPLYDTXxwEztrecOxT7EQf5saSbVYscnyCPDsjPZCkay1J0miXooxbXPidOvuxnyfk80tFmZEbw3k944udIp48vGteBIj9TUg9CS2QrqOUtxKBa27rS0KJIJqCw3rdQcMSKbaCAXJ2RklkyA69LW4BmI-v1BmXMGKbaiqMyZ4davgNHcZXk71FdwUT-2sv-ZEridhV6iYM0Fg2DLpdcSUZGFJ7OGWuRDPMTKoE4EWr2RGXQxh0YyxWBJncJdJzMjIZ2C6u5FM8123m1a5J2p-zu2r1yFOf0ivBPNvkj4fPs53hBSOjvty6InxH9YEC3RX9HrvMjUVwIYJABnQZwngrZ3h-v3Pl8ibQcWuEZ9vKFtBx0xtnLXQjGzVvuTcTklo0adsc2dhxkx57Ad_Z_2dQn-TLOF_GCTEgNG7JaKk2lMSWpBHuf1EosN8ZsteW4bqbXwJDrLKASon-x670bfNjY_2bUwKOj0pCUl2k8n8eTzTJLpgVOF7N5kS_mjIK3NItjlmZREtJ5HE1gxTChl4DZw7hiby0bh3sIYPLfEfAlDjGcUQp_UQizO5_OU1oUYZGkYbKgsHpZCT0pcAUs1Xqils5kVq-1rWuujT68JFrzdcWYAwwIDTeCLbsvN7ZY_vj2bIdKtvyj-zrT_XvR9ofPTzhAZSrNqZ440EuH-F_oqf4U>53883</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            AArch64: SVE: no merge combine with masked store intrinsics
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
            new issue
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          rscottmanley
      </td>
    </tr>
</table>

<pre>
    With the following LLVM IR, the AArch64 backend is producing an extra store instead of combining into a sel+store. 



```
llc -mtriple=aarch64 -mcpu=neoverse-v1 -O3 -aarch64-sve-vector-bits-min=256 merge.ll

define void @merge(i64* %a, i64* nocapture readonly %b, i64* nocapture readonly %c) {
L.entry:
  %0 = bitcast i64* %c to <8 x float>*
  %1 = load <8 x float>, <8 x float>* %0
  %2 = fcmp une <8 x float> %1, zeroinitializer
  %3 = bitcast i64* %b to <8 x float>*
  %4 = load <8 x float>, <8 x float>* %3
  %5 = fdiv <8 x float> %4, %1
  %6 = bitcast i64* %a to <8 x float>*
  tail call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %5, <8 x float>* %6, i32 4, <8 x i1> %2)
  %7 = xor <8 x i1> %2, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
  tail call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %4, <8 x float>* %6, i32 4, <8 x i1> %7)
  ret void
}

declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32 immarg, <8 x i1>)
```

produces following (note the side issue with the mask itself as well via mov/eor/cmpne):

```
merge:                                  // @merge
        ptrue   p0.s, vl8
        mov     z2.s, #-1                       // =0xffffffffffffffff
        ld1w    { z0.s }, p0/z, [x2]
        fcmeq   p1.s, p0/z, z0.s, #0.0
        mov     z1.s, p1/z, #-1                 // =0xffffffffffffffff
        eor     z1.d, z1.d, z2.d
        cmpne   p2.s, p0/z, z1.s, #0
        ld1w    { z1.s }, p0/z, [x1]
        fdivr   z0.s, p0/m, z0.s, z1.s
        st1w    { z0.s }, p2, [x0]
        st1w    { z1.s }, p1, [x0]
        ret
```

Also note, that using scalable types also has the same problem (though better mask code)

```
llc -mtriple=aarch64 -mcpu=neoverse-v1 -O3 merge-scalable.ll

define void @merge_scalable(i64* %a, i64* nocapture readonly %b, i64* nocapture readonly %c) {
  %c0 = bitcast i64* %c to <vscale x 4 x float>*
  %1 = load <vscale x 4 x float>, <vscale x 4 x float>* %c0
  %2 = fcmp contract une <vscale x 4 x float> %1, zeroinitializer
  %3 = bitcast i64* %b to <vscale x 4 x float>*
  %4 = load <vscale x 4 x float>, <vscale x 4 x float>* %3
  %5 = fdiv <vscale x 4 x float> %4, %1
  %6 = bitcast i64* %a to <vscale x 4 x float>*
  tail call void @llvm.masked.store.nxv4f32.p0nxv4f32(<vscale x 4 x float> %5, <vscale x 4 x float>* %6, i32 4, <vscale x 4 x i1> %2)
  %7 = fcmp contract oeq <vscale x 4 x float> %1, zeroinitializer
  tail call void @llvm.masked.store.nxv4f32.p0nxv4f32(<vscale x 4 x float> %4, <vscale x 4 x float>* %6, i32 4, <vscale x 4 x i1> %7)
  ret void
}

declare void @llvm.masked.store.nxv4f32.p0nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>*, i32 immarg, <vscale x 4 x i1>)
```

.s ==>

```
merge_scalable:                         // @merge_scalable
        ptrue   p0.s
        ld1w    { z0.s }, p0/z, [x2]
        ld1w    { z1.s }, p0/z, [x1]
        fcmeq   p1.s, p0/z, z0.s, #0.0
        fdivr   z0.s, p0/m, z0.s, z1.s
        not     p2.b, p0/z, p1.b
        st1w    { z0.s }, p2, [x0]
        st1w    { z1.s }, p1, [x0]
        ret
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzNWN-PqjgU_mvwpYFAQdQHH2bGuckms9lkN7n7uCnloN1bqEuL48xfv6cFFUdR13iTRUJBzo_vnPacfiFT-cf8T2FWxKyAFEpK9S6qJXl7-_4r-eV3j764F09PNV-lCckY_wFVToQm61rlDbeyrCKwNTUj2qgaiKi0AZYTVRCuykxUVkZURhGUAOnRZycXEC9ceOHTmWsadqd7lJITvzS1WEvw4gVjLRS_5OsGnytQG6g1-JuI-L_FxO8EfL3B_4CjLz8TRvulqFCcjlNSQr2EQMq-0xwKUQHZKJETLwmdiEenIk08-kQ8OmY2F91jpThbmwaDrTFSVckPK5FdleAenRFv8ty6fAugMvWHF3cQiBUJCYIkCJgzbcjBPSeYQC9-mZItKaRixotf8VVPM3Ka-Co_lXs5o-q89fSp0y94uSYNZuKLgvNgDX1CrXBOjWBS4H3PQDwAPbsKPbkDetzTH7fQc7E5BztxRiz8g0Y6gJVdxmqYkIQzKfcLRcpNGZRM_4A8aNf1ZlrENFiHbsQldAbReDis1C2imJKkJyOiThHtzXpRTFwUW1WfE3TaIiKmbsAZfdgtgn1sPpJ78zHp5aMG41B0VT1ZHJc3l6yG-2GeR7gDJ8qS1cuvCPfgvnS09to2UNC9rotuK2XAtVwtcuylWjdA3nf92QIm2MtAFoRp8g428YKRUm08-g1UjVes3wqs5_hSU237W4xpu3agSTwPPbHLdnus7XqwYxhoG_1GTo8FEJkbP2kr4NHYj654ihfhtvhyHFuVefTuNCbP5BNdEzvbaH0dooFP52f8vKXeeHGsh80N_rFwoxbNQf4z3OMLg3Aghp1WtPdyNprb48AZ21nOHYrdSIP8WNLNqkVOT5BHB-QXshQNZSk6zRL2UYtrlxMnX_az5Hwe6WgzMCN05yc88XOk08cXDetgkV8oqSepFbEV1PIWZkijbV1pbFEsk1hYH2ssOGbFVlhArs5YCZbM4OvSFqBZqWa5IhkYA3VbcVzlcKjlB3AUV0n-DtUNTOSvnexPpiRuV-HXOMjGogHscsmNZGRA4eWSsRbJeX7CFRInxs2OqJy18QDGckOgyUMCHSYzg6Hdw2quxXPLdl5tN0m7U3Z37V45iHN8Q_gnm_yR8GX2c7wgFPb3exfEz4j-bED3Rf9ArvNforgSwFkGdBrAZSpke3-8cOfrNdJyaIUX2MsX0nLQGWYvDyEYd2-5dxGTezZq3B3b2GmQHXtC39n_ZVMf5fM4n8UzNjLCSJh33x_slP_x_dUOlWp30e4bQ0eS21W-_xCBG3KlBdejppbzlTFrbVmxWxtLFG-yALXxwVZIN_hIBf4GbvDRkW9M47dxPJ3Go9W8SCaTjM94TmmS5YyN4yLPCkinLC4AIB3hGgOp5xilR2kF7y1_x3sMeSTmNKR4Rin-ohBnczqeprwowiJJw2TGcbVCiT0ocAWr6uWonjtIWbPUto6FNvrwkmktlhWAc4f2WYPUpZ7XmitjSlZJ-Bg5_3OH_1_Sbuo7">