<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=https://github.com/llvm/llvm-project/issues/55197>55197</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            SVE: bad lowering of copysign with fixed length vectors
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          rscottmanley
      </td>
    </tr>
</table>

<pre>
    When targeting SVE using fixed length vectors, the llvm.copysign intrinsic generates very inefficient code:

```
target triple = "arm64-linux"

define void @copysign_(i64* nocapture writeonly %a, i64* nocapture readonly %b, i64* nocapture readonly %c) {
L.entry:
  %0 = bitcast i64* %b to <8 x float>*
  %1 = load <8 x float>, <8 x float>* %0, align 4
  %2 = bitcast i64* %c to <8 x float>*
  %3 = load <8 x float>, <8 x float>* %2, align 4
  %4 = tail call <8 x float> @llvm.copysign.v8f32(<8 x float> %1, <8 x float> %3)
  %5 = bitcast i64* %a to <8 x float>*
  store <8 x float> %4, <8 x float>* %5, align 4
  ret void
}

declare <8 x float> @llvm.copysign.v8f32(<8 x float>, <8 x float>)


llc copysign-vls.ll -O3 -mcpu=neoverse-v1 -o copysign-vls.s -aarch64-sve-vector-bits-min=256


copysign_:                              // @copysign_
        stp     x29, x30, [sp, #-16]!           // 16-byte Folded Spill
        mov     x29, sp
        sub     x9, sp, #48
        and     sp, x9, #0xffffffffffffffe0
        ptrue   p0.s, vl8
        mvni    v0.4s, #128, lsl #24
        ld1w    { z1.s }, p0/z, [x1]
        ld1w    { z2.s }, p0/z, [x2]
        mov     z3.s, z2.s[7]
        mov     z4.s, z1.s[7]
        mov     z5.s, z2.s[6]
        mov     z6.s, z2.s[5]
        mov     z7.s, z1.s[5]
        mov     z16.s, z1.s[3]
        mov     z17.s, z1.s[2]
        bit     v3.16b, v4.16b, v0.16b
        mov     z4.s, z1.s[6]
        bit     v6.16b, v7.16b, v0.16b
        mov     z7.s, z1.s[4]
        bif     v4.16b, v5.16b, v0.16b
        mov     z5.s, z2.s[4]
        bit     v5.16b, v7.16b, v0.16b
        mov     z7.s, z2.s[3]
        bit     v7.16b, v16.16b, v0.16b
        mov     z16.s, z2.s[2]
        stp     s4, s3, [sp, #24]
        mov     v3.16b, v0.16b
        stp     s5, s6, [sp, #16]
        bit     v16.16b, v17.16b, v0.16b
        stp     s16, s7, [sp, #8]
        bsl     v3.16b, v1.16b, v2.16b
        mov     z2.s, z2.s[1]
        mov     z1.s, z1.s[1]
        bsl     v0.16b, v1.16b, v2.16b
        stp     s3, s0, [sp]
        ld1w    { z0.s }, p0/z, [sp]
        st1w    { z0.s }, p0, [x0]
        mov     sp, x29
        ldp     x29, x30, [sp], #16             // 16-byte Folded Reload
        ret
```

In Godbolt: https://godbolt.org/z/qM1ncWMzM
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJydV12PqzYQ_TXkxSLC5it54OHu5m5VqatKvVLvY2XAJK4coNiwyf76jk0gcYBsdFEUQD5zZuZ4bA9plZ-TnwdWIkWbPVO83KMff39HrdRPBT-xHAlW7tUBdSxTVSMd8orUgSEhuuM6q-qz5PsS8VI1vJQ8Q3tWsoYqJsGgOcMAKwqecVYqlFU5c_xvjrdzvOE_8i4_89oHgYCrFgw5_g45hNDmGAWu4GV7grdb65wVwI-6iufICbwhmn8csuFR4JBvqKwyWqu2Yeij4YpVpTgDZUh1FhNIw2g-INIvEZlDtsiJX_pQ_lhDhs15TA9piGdSSLnKqFQDmyZHqoKh1w06oUJUVDn-dxi6scTGEobyKe51xtR400NU6OkIbqjIQhDZl0H4vxAEmQ8iMFSKcoEyKsS9sZ49q6DW3abwgWszAYI2M95NuDAhNy7Dhbzp47wlFDmbow-Wkw5nkm6gjnVhXuo13tmFmwk65-ZJGWZD2VoLy_wLkaGBy-2EXIP07p8-co9Z3YI8JatglUrmdhi5lQ2VyKW0yQ6w9mQHCLP8XdBTukdegjEJo6nH6xr0QYRHl0Pe4Gev24t2_SVVbe4nyAzyPfmmwJ3wRdbmgfgujpxw5xA8pcWRm54VQ2-VyGEP-1FzIWz6Y9Xd0gOp7b1N--FhtHcZbGwYLfMebgA9GGDeqbAu5tlmtWpapu_e2uyonbjjPXYl1_fOWwfyQoqhDuBJSKHfSGBbiBx_GAHiF_SJYfp0zQG8BtnePi_SnbAWbNmOLNmRid2g36ffp6BtARkvA4MLEH8FDC3GaBkYWcBwGRhbrh8AcWQh_QdIm3MqECwVc-_8NY7MedIF45Nnnp7RaZr-SByNdPFTxHbEwQxx0RNf4wyfIrYnbI74EnH4ixGThdkYia90OHqKGdu1M52-YfuRZuOX_v3mQ6ZpDuQ3Mz4Twkhsjg0Z3RPjBzN-kxx-qODoBBt-Gd972UydwL5yFz0en8gDKYml5HSHuWpu1d8UOEbgPRfBmKSZHHlzPjzc5LyFTW7GTqplu35j9BbTvRwJ16ZkCGX5WAt3Yw2g22v-UPuL6d7MZoe2Y7az7v9_L9FvVZ5WQunj-aBULXXHatj3_cC6ava9Jm__veMy-_n--b7KEz_f-lu6UlwJlsAXgrZPoTEU1Qdr9LdCVYztA_rg8L0w9_GwahuR3LkFbJtCx3OEF938XG5u3VT_ghW8cilbBpXzFoZ4G68OCSmonwdxENF462U4D2m4KbBH05BsvHyTrwRNmZAJiAqSrnhCPEK8gGwJwYSE64BGeRTjNCeeh7dZBD0IO0JvujbNFwiwahITQ9rupe7JuFTyOkilzpKxgZ-26lA1SSOzSqkjLQU7r0zMiQn4fy6GelY">