<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/67230>67230</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
[AArch64] Large number of unexpected spills
</td>
</tr>
<tr>
<th>Labels</th>
<td>
new issue
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
vfdff
</td>
</tr>
</table>
<pre>
* test: **clang -march=armv8.2-a+sve -O3 -S test.c -ffast-math -msve-vector-bits=256**
```
#include <arm_sve.h>
typedef svfloat64_t fvec64 __attribute__((arm_sve_vector_bits(256)));
typedef fvec64 __m256d;
void foo (__m256d *val);
__m256d _mm256_mul_pd(__m256d a, __m256d b, int n)
{
__m256d res;
for (int i=0; i<n; i++) {
res = svmul_f64_z(svptrue_b64(), a, b);
foo (&res);
}
return res;
}
```
* **clang's output**: The Z registers and P registers are callee save register, so they don't need store before the call and restore after call ?
```
// %bb.1: // %for.body.lr.ph
stp x29, x30, [sp, #-48]! // 16-byte Folded Spill
.cfi_def_cfa_offset 48
str x28, [sp, #16] // 8-byte Folded Spill
stp x20, x19, [sp, #32] // 16-byte Folded Spill
mov x29, sp
.cfi_def_cfa w29, 48
.cfi_offset w19, -8
.cfi_offset w20, -16
.cfi_offset w28, -32
.cfi_offset w30, -40
.cfi_offset w29, -48
addvl sp, sp, #-18
str p15, [sp, #4, mul vl] // 2-byte Folded Spill
str p14, [sp, #5, mul vl] // 2-byte Folded Spill
str p13, [sp, #6, mul vl] // 2-byte Folded Spill
str p12, [sp, #7, mul vl] // 2-byte Folded Spill
str p11, [sp, #8, mul vl] // 2-byte Folded Spill
str p10, [sp, #9, mul vl] // 2-byte Folded Spill
str p9, [sp, #10, mul vl] // 2-byte Folded Spill
str p8, [sp, #11, mul vl] // 2-byte Folded Spill
str p7, [sp, #12, mul vl] // 2-byte Folded Spill
str p6, [sp, #13, mul vl] // 2-byte Folded Spill
str p5, [sp, #14, mul vl] // 2-byte Folded Spill
str p4, [sp, #15, mul vl] // 2-byte Folded Spill
str z23, [sp, #2, mul vl] // 16-byte Folded Spill
str z22, [sp, #3, mul vl] // 16-byte Folded Spill
str z21, [sp, #4, mul vl] // 16-byte Folded Spill
str z20, [sp, #5, mul vl] // 16-byte Folded Spill
str z19, [sp, #6, mul vl] // 16-byte Folded Spill
str z18, [sp, #7, mul vl] // 16-byte Folded Spill
str z17, [sp, #8, mul vl] // 16-byte Folded Spill
str z16, [sp, #9, mul vl] // 16-byte Folded Spill
str z15, [sp, #10, mul vl] // 16-byte Folded Spill
str z14, [sp, #11, mul vl] // 16-byte Folded Spill
str z13, [sp, #12, mul vl] // 16-byte Folded Spill
str z12, [sp, #13, mul vl] // 16-byte Folded Spill
str z11, [sp, #14, mul vl] // 16-byte Folded Spill
str z10, [sp, #15, mul vl] // 16-byte Folded Spill
str z9, [sp, #16, mul vl] // 16-byte Folded Spill
str z8, [sp, #17, mul vl] // 16-byte Folded Spill
.cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 48 - 8 * VG
.cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 48 - 16 * VG
.cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 48 - 24 * VG
.cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 48 - 32 * VG
.cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 48 - 40 * VG
.cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 48 - 48 * VG
.cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 48 - 56 * VG
.cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 48 - 64 * VG
addvl sp, sp, #-1
sub sp, sp, #32
fmul z0.d, z0.d, z1.d
mov w19, w0
mov x20, sp
str z0, [x29, #-19, mul vl] // 16-byte Folded Spill
.LBB0_2: // %for.body
// =>This Inner Loop Header: Depth=1
ldr z0, [x29, #-19, mul vl] // 16-byte Folded Reload
mov x0, sp
st1d { z0.d }, p0, [x20]
bl foo
ptrue p0.d
...
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJysWEuPqzYY_TXO5hMIbENgkUUyudNWulKr3qsuukEGmwkVL9mGydxfXxnIY-LJJMxDGQx89vHxMecDmylVPNVCrFCwQcF2wTq9a-Sqz3meL9KGv6wQXoMWSiOyBkB4jfA6K1n9BOBUTGY7RLZMVn3kYochvFG9AOdPAuD8GJq5GTh5zpR2KqZ34FSqF04vMt1IJy20QmSLg3DERd4WeWsUetNvvMSkqLOy4wIQeWCySlQv3B0i36b4cNQvreAiB9XnZcN0SBMNeS-ykEKSMK1lkXZaJAnCEcLRhJKMPJKBB44GHvH0I5vXwEewCgchP4bHY98UHPKmAYSjqYaRqmflOdR4PMSTypRJ1ZVJy8_aMYQfDt1Aai6KWkNtgEaU5QQHx1pSqItOIG-kIWOaFohsPUQ25uShHk_wZvjFcIZmYACRLajekMpDmvxCOFJ9q2UnkjSkg3ixoTSQTM8HB3AQAOHQEDqPoeV2OpNCd7J-zfgQvJz4afrX508dwksFTafbTk_PDFnDz52Af0GKp0JpIRWwmsNf59dSQMbKUghQrBfHiBmEakDvxAvwpkZ4qaEWgoPSjRSQitwUeje2HmClGGMs10KOtxF5vPLgPiL8CAgHaer6g33e_TvVzxvpGu-5pXTb3UGNWOkWefF-nIE98UyBgo1qhxNMHBqhYIuwb4P6oZO-aAGPTckFhx9tUZZHXDfLi4SLPMlyljR5roQGGp11K4duo8v-_BAF20MP0TsdHIgPjPd-fAlEsAF6Q4t3aVdNf5JDtW8OB57H8NlwhvA0yueRi3MlOhJ2_PBKeFDEIfjt8DhBDvWutB67PmPGOO9LI1c7jWiaVv9yLlo_uJSQmrLqSujL11pOSuJ3p2cEpZegwReAkkvQ8DXoDCh8CbX8An7-JWj0BaCWNePPg1quGXv5HKjtaf_zoEsLFH8eNLRAyedBLRv5X-Ajy0b-5330C1s-uqXpu7nzgGpZ6paod6Fanrql6l2olqluyXoPqv0yCr8C1fLVrVR1F6plrFu56i5Uy1m3ktVdqLa1rqWrOai2t67lqzmolreuJqw5qJa3rmasOaiWt66mrDmolreu5qwZqPYb6yu8Zb-yPuWt4ZtMqIy1Arz9KIS3p9FYemwsR9m9fTDFx8R5ur-c6sfTfSym9lN9XxzanT7zKY8AEPXAfKc6QCNwIDJrHfjnt1v04pn0vI_Qiy_Z-eGd9Ng8euGH1PM9Sz5M7ySYziT4If183yJI8J0Es3kEg48piC2C1LuTIJ9J8GMKEpvgvRYR8wjSjylILYLBvSbJZxL8mIKBRTC0THJ14XnKvV1qx88WvnnVGYBfnjs8F8fSd_nFen1acj971jreQ6_X8VO-P7yapoX-wOvKF8rNfO9-32y8BN_ei3ljM-awlTUFyBaRbz93hYI_6lpI-N40LfwuGBfSwG9Fq3eIbE8SlvxrhvO3KBt2qereFs_nyIvRcjNMxrADhx-gPXXvoWB7rJ6a2cub5nhj2PMzpXeYQdd1L7a5FnxFeExithArP4wDGuIgoIvdKiBRFGfL1I8pjVPCIhyl3M9zErNUsDBdFCvsYeLF5p_GPnVjHvqUc8qzmMVhFiHqiYoVpVuWfeU28mlRKNWJVbjExFuULBWlGrasMa7FMwxBhDEKtgu5Mm2ctHtSiHplobQ6oehCl8Ne93ots11IjdzfmXwSUHdVKiQ0OXS12Lci04KDMo-OWnSyXO20bhUi067eU6F3XepmTYXwo0GfCqeVzX8i0wg_DpwUwo8D5_8DAAD__3a5FDM">