<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/55197>55197</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
SVE: bad lowering of copysign with fixed length vectors
</td>
</tr>
<tr>
<th>Labels</th>
<td>
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
rscottmanley
</td>
</tr>
</table>
<pre>
When targeting SVE using fixed length vectors, the llvm.copysign intrinsic generates very inefficient code:
```
target triple = "arm64-linux"
define void @copysign_(i64* nocapture writeonly %a, i64* nocapture readonly %b, i64* nocapture readonly %c) {
L.entry:
%0 = bitcast i64* %b to <8 x float>*
%1 = load <8 x float>, <8 x float>* %0, align 4
%2 = bitcast i64* %c to <8 x float>*
%3 = load <8 x float>, <8 x float>* %2, align 4
%4 = tail call <8 x float> @llvm.copysign.v8f32(<8 x float> %1, <8 x float> %3)
%5 = bitcast i64* %a to <8 x float>*
store <8 x float> %4, <8 x float>* %5, align 4
ret void
}
declare <8 x float> @llvm.copysign.v8f32(<8 x float>, <8 x float>)
llc copysign-vls.ll -O3 -mcpu=neoverse-v1 -o copysign-vls.s -aarch64-sve-vector-bits-min=256
copysign_: // @copysign_
stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
mov x29, sp
sub x9, sp, #48
and sp, x9, #0xffffffffffffffe0
ptrue p0.s, vl8
mvni v0.4s, #128, lsl #24
ld1w { z1.s }, p0/z, [x1]
ld1w { z2.s }, p0/z, [x2]
mov z3.s, z2.s[7]
mov z4.s, z1.s[7]
mov z5.s, z2.s[6]
mov z6.s, z2.s[5]
mov z7.s, z1.s[5]
mov z16.s, z1.s[3]
mov z17.s, z1.s[2]
bit v3.16b, v4.16b, v0.16b
mov z4.s, z1.s[6]
bit v6.16b, v7.16b, v0.16b
mov z7.s, z1.s[4]
bif v4.16b, v5.16b, v0.16b
mov z5.s, z2.s[4]
bit v5.16b, v7.16b, v0.16b
mov z7.s, z2.s[3]
bit v7.16b, v16.16b, v0.16b
mov z16.s, z2.s[2]
stp s4, s3, [sp, #24]
mov v3.16b, v0.16b
stp s5, s6, [sp, #16]
bit v16.16b, v17.16b, v0.16b
stp s16, s7, [sp, #8]
bsl v3.16b, v1.16b, v2.16b
mov z2.s, z2.s[1]
mov z1.s, z1.s[1]
bsl v0.16b, v1.16b, v2.16b
stp s3, s0, [sp]
ld1w { z0.s }, p0/z, [sp]
st1w { z0.s }, p0, [x0]
mov sp, x29
ldp x29, x30, [sp], #16 // 16-byte Folded Reload
ret
```
In Godbolt: https://godbolt.org/z/qM1ncWMzM
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJydV12PqzYQ_TXkxSLC5it54OHu5m5VqatKvVLvY2XAJK4coNiwyf76jk0gcYBsdFEUQD5zZuZ4bA9plZ-TnwdWIkWbPVO83KMff39HrdRPBT-xHAlW7tUBdSxTVSMd8orUgSEhuuM6q-qz5PsS8VI1vJQ8Q3tWsoYqJsGgOcMAKwqecVYqlFU5c_xvjrdzvOE_8i4_89oHgYCrFgw5_g45hNDmGAWu4GV7grdb65wVwI-6iufICbwhmn8csuFR4JBvqKwyWqu2Yeij4YpVpTgDZUh1FhNIw2g-INIvEZlDtsiJX_pQ_lhDhs15TA9piGdSSLnKqFQDmyZHqoKh1w06oUJUVDn-dxi6scTGEobyKe51xtR400NU6OkIbqjIQhDZl0H4vxAEmQ8iMFSKcoEyKsS9sZ49q6DW3abwgWszAYI2M95NuDAhNy7Dhbzp47wlFDmbow-Wkw5nkm6gjnVhXuo13tmFmwk65-ZJGWZD2VoLy_wLkaGBy-2EXIP07p8-co9Z3YI8JatglUrmdhi5lQ2VyKW0yQ6w9mQHCLP8XdBTukdegjEJo6nH6xr0QYRHl0Pe4Gev24t2_SVVbe4nyAzyPfmmwJ3wRdbmgfgujpxw5xA8pcWRm54VQ2-VyGEP-1FzIWz6Y9Xd0gOp7b1N--FhtHcZbGwYLfMebgA9GGDeqbAu5tlmtWpapu_e2uyonbjjPXYl1_fOWwfyQoqhDuBJSKHfSGBbiBx_GAHiF_SJYfp0zQG8BtnePi_SnbAWbNmOLNmRid2g36ffp6BtARkvA4MLEH8FDC3GaBkYWcBwGRhbrh8AcWQh_QdIm3MqECwVc-_8NY7MedIF45Nnnp7RaZr-SByNdPFTxHbEwQxx0RNf4wyfIrYnbI74EnH4ixGThdkYia90OHqKGdu1M52-YfuRZuOX_v3mQ6ZpDuQ3Mz4Twkhsjg0Z3RPjBzN-kxx-qODoBBt-Gd972UydwL5yFz0en8gDKYml5HSHuWpu1d8UOEbgPRfBmKSZHHlzPjzc5LyFTW7GTqplu35j9BbTvRwJ16ZkCGX5WAt3Yw2g22v-UPuL6d7MZoe2Y7az7v9_L9FvVZ5WQunj-aBULXXHatj3_cC6ava9Jm__veMy-_n--b7KEz_f-lu6UlwJlsAXgrZPoTEU1Qdr9LdCVYztA_rg8L0w9_GwahuR3LkFbJtCx3OEF938XG5u3VT_ghW8cilbBpXzFoZ4G68OCSmonwdxENF462U4D2m4KbBH05BsvHyTrwRNmZAJiAqSrnhCPEK8gGwJwYSE64BGeRTjNCeeh7dZBD0IO0JvujbNFwiwahITQ9rupe7JuFTyOkilzpKxgZ-26lA1SSOzSqkjLQU7r0zMiQn4fy6GelY">