<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=https://github.com/llvm/llvm-project/issues/67230>67230</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            [AArch64] Large number of unexpected spills
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
            new issue
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          vfdff
      </td>
    </tr>
</table>

<pre>
    * test:  **clang  -march=armv8.2-a+sve -O3  -S test.c -ffast-math -msve-vector-bits=256**
```
#include <arm_sve.h>

typedef svfloat64_t fvec64 __attribute__((arm_sve_vector_bits(256)));
typedef fvec64 __m256d;

void foo (__m256d *val);

__m256d _mm256_mul_pd(__m256d a, __m256d b, int n)
{
  __m256d res;

 for (int i=0; i<n; i++) {
   res = svmul_f64_z(svptrue_b64(), a, b);
   foo (&res);
 }
 return res;
}
```

* **clang's output**: The Z registers and P registers are callee save register, so they don't need store before the call and restore after call ?
```
// %bb.1:                               // %for.body.lr.ph
        stp     x29, x30, [sp, #-48]!           // 16-byte Folded Spill
        .cfi_def_cfa_offset 48
        str     x28, [sp, #16] // 8-byte Folded Spill
        stp     x20, x19, [sp, #32]             // 16-byte Folded Spill
        mov     x29, sp
        .cfi_def_cfa w29, 48
        .cfi_offset w19, -8
        .cfi_offset w20, -16
        .cfi_offset w28, -32
        .cfi_offset w30, -40
        .cfi_offset w29, -48
        addvl   sp, sp, #-18
        str     p15, [sp, #4, mul vl]           // 2-byte Folded Spill
        str     p14, [sp, #5, mul vl]           // 2-byte Folded Spill
        str     p13, [sp, #6, mul vl] // 2-byte Folded Spill
        str     p12, [sp, #7, mul vl]           // 2-byte Folded Spill
        str     p11, [sp, #8, mul vl]           // 2-byte Folded Spill
        str     p10, [sp, #9, mul vl]           // 2-byte Folded Spill
        str     p9, [sp, #10, mul vl]           // 2-byte Folded Spill
        str     p8, [sp, #11, mul vl]           // 2-byte Folded Spill
        str     p7, [sp, #12, mul vl]           // 2-byte Folded Spill
        str     p6, [sp, #13, mul vl]           // 2-byte Folded Spill
        str     p5, [sp, #14, mul vl]           // 2-byte Folded Spill
        str     p4, [sp, #15, mul vl]           // 2-byte Folded Spill
        str     z23, [sp, #2, mul vl]           // 16-byte Folded Spill
        str     z22, [sp, #3, mul vl]           // 16-byte Folded Spill
        str     z21, [sp, #4, mul vl]           // 16-byte Folded Spill
        str     z20, [sp, #5, mul vl]           // 16-byte Folded Spill
        str     z19, [sp, #6, mul vl]           // 16-byte Folded Spill
        str     z18, [sp, #7, mul vl]           // 16-byte Folded Spill
        str     z17, [sp, #8, mul vl]           // 16-byte Folded Spill
        str     z16, [sp, #9, mul vl]           // 16-byte Folded Spill
        str     z15, [sp, #10, mul vl]          // 16-byte Folded Spill
        str     z14, [sp, #11, mul vl]          // 16-byte Folded Spill
        str     z13, [sp, #12, mul vl]          // 16-byte Folded Spill
        str     z12, [sp, #13, mul vl]          // 16-byte Folded Spill
        str     z11, [sp, #14, mul vl]          // 16-byte Folded Spill
        str     z10, [sp, #15, mul vl]          // 16-byte Folded Spill
        str     z9, [sp, #16, mul vl]           // 16-byte Folded Spill
        str     z8, [sp, #17, mul vl]           // 16-byte Folded Spill
        .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8  @ cfa - 48 - 8 * VG
        .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 48 - 16 * VG
        .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10  @ cfa - 48 - 24 * VG
        .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11  @ cfa - 48 - 32 * VG
        .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12  @ cfa - 48 - 40 * VG
        .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13  @ cfa - 48 - 48 * VG
        .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14  @ cfa - 48 - 56 * VG
        .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15  @ cfa - 48 - 64 * VG
        addvl   sp, sp, #-1
        sub     sp, sp, #32
        fmul    z0.d, z0.d, z1.d
        mov     w19, w0
        mov     x20, sp
        str     z0, [x29, #-19, mul vl]         // 16-byte Folded Spill
.LBB0_2:                                // %for.body
 // =>This Inner Loop Header: Depth=1
        ldr     z0, [x29, #-19, mul vl]         // 16-byte Folded Reload
        mov     x0, sp
        st1d    { z0.d }, p0, [x20]
        bl      foo
        ptrue   p0.d
...
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJysWEuPqzYY_TXO5hMIbENgkUUyudNWulKr3qsuukEGmwkVL9mGydxfXxnIY-LJJMxDGQx89vHxMecDmylVPNVCrFCwQcF2wTq9a-Sqz3meL9KGv6wQXoMWSiOyBkB4jfA6K1n9BOBUTGY7RLZMVn3kYochvFG9AOdPAuD8GJq5GTh5zpR2KqZ34FSqF04vMt1IJy20QmSLg3DERd4WeWsUetNvvMSkqLOy4wIQeWCySlQv3B0i36b4cNQvreAiB9XnZcN0SBMNeS-ykEKSMK1lkXZaJAnCEcLRhJKMPJKBB44GHvH0I5vXwEewCgchP4bHY98UHPKmAYSjqYaRqmflOdR4PMSTypRJ1ZVJy8_aMYQfDt1Aai6KWkNtgEaU5QQHx1pSqItOIG-kIWOaFohsPUQ25uShHk_wZvjFcIZmYACRLajekMpDmvxCOFJ9q2UnkjSkg3ixoTSQTM8HB3AQAOHQEDqPoeV2OpNCd7J-zfgQvJz4afrX508dwksFTafbTk_PDFnDz52Af0GKp0JpIRWwmsNf59dSQMbKUghQrBfHiBmEakDvxAvwpkZ4qaEWgoPSjRSQitwUeje2HmClGGMs10KOtxF5vPLgPiL8CAgHaer6g33e_TvVzxvpGu-5pXTb3UGNWOkWefF-nIE98UyBgo1qhxNMHBqhYIuwb4P6oZO-aAGPTckFhx9tUZZHXDfLi4SLPMlyljR5roQGGp11K4duo8v-_BAF20MP0TsdHIgPjPd-fAlEsAF6Q4t3aVdNf5JDtW8OB57H8NlwhvA0yueRi3MlOhJ2_PBKeFDEIfjt8DhBDvWutB67PmPGOO9LI1c7jWiaVv9yLlo_uJSQmrLqSujL11pOSuJ3p2cEpZegwReAkkvQ8DXoDCh8CbX8An7-JWj0BaCWNePPg1quGXv5HKjtaf_zoEsLFH8eNLRAyedBLRv5X-Ajy0b-5330C1s-uqXpu7nzgGpZ6paod6Fanrql6l2olqluyXoPqv0yCr8C1fLVrVR1F6plrFu56i5Uy1m3ktVdqLa1rqWrOai2t67lqzmolreuJqw5qJa3rmasOaiWt66mrDmolreu5qwZqPYb6yu8Zb-yPuWt4ZtMqIy1Arz9KIS3p9FYemwsR9m9fTDFx8R5ur-c6sfTfSym9lN9XxzanT7zKY8AEPXAfKc6QCNwIDJrHfjnt1v04pn0vI_Qiy_Z-eGd9Ng8euGH1PM9Sz5M7ySYziT4If183yJI8J0Es3kEg48piC2C1LuTIJ9J8GMKEpvgvRYR8wjSjylILYLBvSbJZxL8mIKBRTC0THJ14XnKvV1qx88WvnnVGYBfnjs8F8fSd_nFen1acj971jreQ6_X8VO-P7yapoX-wOvKF8rNfO9-32y8BN_ei3ljM-awlTUFyBaRbz93hYI_6lpI-N40LfwuGBfSwG9Fq3eIbE8SlvxrhvO3KBt2qereFs_nyIvRcjNMxrADhx-gPXXvoWB7rJ6a2cub5nhj2PMzpXeYQdd1L7a5FnxFeExithArP4wDGuIgoIvdKiBRFGfL1I8pjVPCIhyl3M9zErNUsDBdFCvsYeLF5p_GPnVjHvqUc8qzmMVhFiHqiYoVpVuWfeU28mlRKNWJVbjExFuULBWlGrasMa7FMwxBhDEKtgu5Mm2ctHtSiHplobQ6oehCl8Ne93ots11IjdzfmXwSUHdVKiQ0OXS12Lci04KDMo-OWnSyXO20bhUi067eU6F3XepmTYXwo0GfCqeVzX8i0wg_DpwUwo8D5_8DAAD__3a5FDM">