[llvm] d8e8ab7 - [AArch64][SME] Fix restoring callee-saves from FP with hazard padding (#143371)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 18 04:58:21 PDT 2025
Author: Benjamin Maxwell
Date: 2025-06-18T12:58:17+01:00
New Revision: d8e8ab79773f739c602c5869f80c6c5b5962c558
URL: https://github.com/llvm/llvm-project/commit/d8e8ab79773f739c602c5869f80c6c5b5962c558
DIFF: https://github.com/llvm/llvm-project/commit/d8e8ab79773f739c602c5869f80c6c5b5962c558.diff
LOG: [AArch64][SME] Fix restoring callee-saves from FP with hazard padding (#143371)
Currently, when hazard-padding is enabled a (fixed-size) hazard slot is
placed in the CS area, just after the frame record. The size of this
slot is part of the "CalleeSaveBaseToFrameRecordOffset". The SVE
epilogue emission code assumed this offset was always zero, and
incorrectly setting the stack pointer, resulting in all SVE registers
being reloaded from incorrect offsets.
```
| prev_lr |
| prev_fp |
| (a.k.a. "frame record") |
|-----------------------------------| <- fp(=x29)
| <hazard padding> |
|-----------------------------------| <- callee-saved base
| |
| callee-saved fp/simd/SVE regs |
| |
|-----------------------------------| <- SVE callee-save base
```
i.e. in the above diagram, the code assumed `fp == callee-saved base`.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
llvm/test/CodeGen/AArch64/stack-hazard.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 7ffe779f2408d..a71668e71c235 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -2578,20 +2578,33 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
NeedsWinCFI, &HasWinCFI);
} else if (SVEStackSize) {
- // If we have stack realignment or variable sized objects on the stack,
- // restore the stack pointer from the frame pointer prior to SVE CSR
- // restoration.
- if (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) {
- if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
- // Set SP to start of SVE callee-save area from which they can
- // be reloaded. The code below will deallocate the stack space
- // space by moving FP -> SP.
- emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::FP,
- StackOffset::getScalable(-CalleeSavedSize), TII,
+ int64_t SVECalleeSavedSize = AFI->getSVECalleeSavedStackSize();
+ // If we have stack realignment or variable-sized objects we must use the
+ // FP to restore SVE callee saves (as there is an unknown amount of
+ // data/padding between the SP and SVE CS area).
+ Register BaseForSVEDealloc =
+ (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) ? AArch64::FP
+ : AArch64::SP;
+ if (SVECalleeSavedSize && BaseForSVEDealloc == AArch64::FP) {
+ Register CalleeSaveBase = AArch64::FP;
+ if (int64_t CalleeSaveBaseOffset =
+ AFI->getCalleeSaveBaseToFrameRecordOffset()) {
+ // If we have have an non-zero offset to the non-SVE CS base we need to
+ // compute the base address by subtracting the offest in a temporary
+ // register first (to avoid briefly deallocating the SVE CS).
+ CalleeSaveBase = MBB.getParent()->getRegInfo().createVirtualRegister(
+ &AArch64::GPR64RegClass);
+ emitFrameOffset(MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
+ StackOffset::getFixed(-CalleeSaveBaseOffset), TII,
MachineInstr::FrameDestroy);
}
- } else {
- if (AFI->getSVECalleeSavedStackSize()) {
+ // The code below will deallocate the stack space space by moving the
+ // SP to the start of the SVE callee-save area.
+ emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
+ StackOffset::getScalable(-SVECalleeSavedSize), TII,
+ MachineInstr::FrameDestroy);
+ } else if (BaseForSVEDealloc == AArch64::SP) {
+ if (SVECalleeSavedSize) {
// Deallocate the non-SVE locals first before we can deallocate (and
// restore callee saves) from the SVE area.
emitFrameOffset(
diff --git a/llvm/test/CodeGen/AArch64/stack-hazard.ll b/llvm/test/CodeGen/AArch64/stack-hazard.ll
index e169b199733bd..3a33405200132 100644
--- a/llvm/test/CodeGen/AArch64/stack-hazard.ll
+++ b/llvm/test/CodeGen/AArch64/stack-hazard.ll
@@ -3143,3 +3143,1176 @@ entry:
call void @bar(ptr noundef nonnull %b)
ret i32 0
}
+
+
+define i32 @svecc_call_dynamic_alloca(<4 x i16> %P0, i32 %P1, i32 %P2, <vscale x 16 x i8> %P3, i16 %P4) "aarch64_pstate_sm_compatible" {
+; CHECK0-LABEL: svecc_call_dynamic_alloca:
+; CHECK0: // %bb.0: // %entry
+; CHECK0-NEXT: stp x29, x30, [sp, #-64]! // 16-byte Folded Spill
+; CHECK0-NEXT: .cfi_def_cfa_offset 64
+; CHECK0-NEXT: cntd x9
+; CHECK0-NEXT: stp x27, x26, [sp, #32] // 16-byte Folded Spill
+; CHECK0-NEXT: stp x9, x28, [sp, #16] // 16-byte Folded Spill
+; CHECK0-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK0-NEXT: mov x29, sp
+; CHECK0-NEXT: .cfi_def_cfa w29, 64
+; CHECK0-NEXT: .cfi_offset w19, -8
+; CHECK0-NEXT: .cfi_offset w20, -16
+; CHECK0-NEXT: .cfi_offset w26, -24
+; CHECK0-NEXT: .cfi_offset w27, -32
+; CHECK0-NEXT: .cfi_offset w28, -40
+; CHECK0-NEXT: .cfi_offset w30, -56
+; CHECK0-NEXT: .cfi_offset w29, -64
+; CHECK0-NEXT: addvl sp, sp, #-18
+; CHECK0-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 64 - 8 * VG
+; CHECK0-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 64 - 16 * VG
+; CHECK0-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 64 - 24 * VG
+; CHECK0-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 64 - 32 * VG
+; CHECK0-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 64 - 40 * VG
+; CHECK0-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 64 - 48 * VG
+; CHECK0-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 64 - 56 * VG
+; CHECK0-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 64 - 64 * VG
+; CHECK0-NEXT: mov w9, w0
+; CHECK0-NEXT: mov x8, sp
+; CHECK0-NEXT: mov w2, w1
+; CHECK0-NEXT: add x9, x9, #15
+; CHECK0-NEXT: mov x19, sp
+; CHECK0-NEXT: and x9, x9, #0x1fffffff0
+; CHECK0-NEXT: sub x8, x8, x9
+; CHECK0-NEXT: mov sp, x8
+; CHECK0-NEXT: //APP
+; CHECK0-NEXT: //NO_APP
+; CHECK0-NEXT: bl __arm_sme_state
+; CHECK0-NEXT: and x20, x0, #0x1
+; CHECK0-NEXT: .cfi_offset vg, -48
+; CHECK0-NEXT: tbz w20, #0, .LBB35_2
+; CHECK0-NEXT: // %bb.1: // %entry
+; CHECK0-NEXT: smstop sm
+; CHECK0-NEXT: .LBB35_2: // %entry
+; CHECK0-NEXT: mov x0, x8
+; CHECK0-NEXT: mov w1, #45 // =0x2d
+; CHECK0-NEXT: bl memset
+; CHECK0-NEXT: tbz w20, #0, .LBB35_4
+; CHECK0-NEXT: // %bb.3: // %entry
+; CHECK0-NEXT: smstart sm
+; CHECK0-NEXT: .LBB35_4: // %entry
+; CHECK0-NEXT: mov w0, #22647 // =0x5877
+; CHECK0-NEXT: movk w0, #59491, lsl #16
+; CHECK0-NEXT: .cfi_restore vg
+; CHECK0-NEXT: addvl sp, x29, #-18
+; CHECK0-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT: .cfi_restore z8
+; CHECK0-NEXT: .cfi_restore z9
+; CHECK0-NEXT: .cfi_restore z10
+; CHECK0-NEXT: .cfi_restore z11
+; CHECK0-NEXT: .cfi_restore z12
+; CHECK0-NEXT: .cfi_restore z13
+; CHECK0-NEXT: .cfi_restore z14
+; CHECK0-NEXT: .cfi_restore z15
+; CHECK0-NEXT: mov sp, x29
+; CHECK0-NEXT: .cfi_def_cfa wsp, 64
+; CHECK0-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr x28, [sp, #24] // 8-byte Folded Reload
+; CHECK0-NEXT: ldp x27, x26, [sp, #32] // 16-byte Folded Reload
+; CHECK0-NEXT: ldp x29, x30, [sp], #64 // 16-byte Folded Reload
+; CHECK0-NEXT: .cfi_def_cfa_offset 0
+; CHECK0-NEXT: .cfi_restore w19
+; CHECK0-NEXT: .cfi_restore w20
+; CHECK0-NEXT: .cfi_restore w26
+; CHECK0-NEXT: .cfi_restore w27
+; CHECK0-NEXT: .cfi_restore w28
+; CHECK0-NEXT: .cfi_restore w30
+; CHECK0-NEXT: .cfi_restore w29
+; CHECK0-NEXT: ret
+;
+; CHECK64-LABEL: svecc_call_dynamic_alloca:
+; CHECK64: // %bb.0: // %entry
+; CHECK64-NEXT: sub sp, sp, #128
+; CHECK64-NEXT: .cfi_def_cfa_offset 128
+; CHECK64-NEXT: cntd x9
+; CHECK64-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK64-NEXT: stp x9, x28, [sp, #80] // 16-byte Folded Spill
+; CHECK64-NEXT: stp x27, x26, [sp, #96] // 16-byte Folded Spill
+; CHECK64-NEXT: stp x20, x19, [sp, #112] // 16-byte Folded Spill
+; CHECK64-NEXT: add x29, sp, #64
+; CHECK64-NEXT: .cfi_def_cfa w29, 64
+; CHECK64-NEXT: .cfi_offset w19, -8
+; CHECK64-NEXT: .cfi_offset w20, -16
+; CHECK64-NEXT: .cfi_offset w26, -24
+; CHECK64-NEXT: .cfi_offset w27, -32
+; CHECK64-NEXT: .cfi_offset w28, -40
+; CHECK64-NEXT: .cfi_offset w30, -56
+; CHECK64-NEXT: .cfi_offset w29, -64
+; CHECK64-NEXT: addvl sp, sp, #-18
+; CHECK64-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 128 - 8 * VG
+; CHECK64-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 128 - 16 * VG
+; CHECK64-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 128 - 24 * VG
+; CHECK64-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 128 - 32 * VG
+; CHECK64-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 128 - 40 * VG
+; CHECK64-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 128 - 48 * VG
+; CHECK64-NEXT: .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 128 - 56 * VG
+; CHECK64-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 128 - 64 * VG
+; CHECK64-NEXT: sub sp, sp, #64
+; CHECK64-NEXT: mov w9, w0
+; CHECK64-NEXT: mov x8, sp
+; CHECK64-NEXT: mov w2, w1
+; CHECK64-NEXT: add x9, x9, #15
+; CHECK64-NEXT: mov x19, sp
+; CHECK64-NEXT: and x9, x9, #0x1fffffff0
+; CHECK64-NEXT: sub x8, x8, x9
+; CHECK64-NEXT: mov sp, x8
+; CHECK64-NEXT: //APP
+; CHECK64-NEXT: //NO_APP
+; CHECK64-NEXT: bl __arm_sme_state
+; CHECK64-NEXT: and x20, x0, #0x1
+; CHECK64-NEXT: .cfi_offset vg, -48
+; CHECK64-NEXT: tbz w20, #0, .LBB35_2
+; CHECK64-NEXT: // %bb.1: // %entry
+; CHECK64-NEXT: smstop sm
+; CHECK64-NEXT: .LBB35_2: // %entry
+; CHECK64-NEXT: mov x0, x8
+; CHECK64-NEXT: mov w1, #45 // =0x2d
+; CHECK64-NEXT: bl memset
+; CHECK64-NEXT: tbz w20, #0, .LBB35_4
+; CHECK64-NEXT: // %bb.3: // %entry
+; CHECK64-NEXT: smstart sm
+; CHECK64-NEXT: .LBB35_4: // %entry
+; CHECK64-NEXT: mov w0, #22647 // =0x5877
+; CHECK64-NEXT: movk w0, #59491, lsl #16
+; CHECK64-NEXT: .cfi_restore vg
+; CHECK64-NEXT: sub x8, x29, #64
+; CHECK64-NEXT: addvl sp, x8, #-18
+; CHECK64-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT: .cfi_restore z8
+; CHECK64-NEXT: .cfi_restore z9
+; CHECK64-NEXT: .cfi_restore z10
+; CHECK64-NEXT: .cfi_restore z11
+; CHECK64-NEXT: .cfi_restore z12
+; CHECK64-NEXT: .cfi_restore z13
+; CHECK64-NEXT: .cfi_restore z14
+; CHECK64-NEXT: .cfi_restore z15
+; CHECK64-NEXT: sub sp, x29, #64
+; CHECK64-NEXT: .cfi_def_cfa wsp, 128
+; CHECK64-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr x28, [sp, #88] // 8-byte Folded Reload
+; CHECK64-NEXT: ldp x27, x26, [sp, #96] // 16-byte Folded Reload
+; CHECK64-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK64-NEXT: add sp, sp, #128
+; CHECK64-NEXT: .cfi_def_cfa_offset 0
+; CHECK64-NEXT: .cfi_restore w19
+; CHECK64-NEXT: .cfi_restore w20
+; CHECK64-NEXT: .cfi_restore w26
+; CHECK64-NEXT: .cfi_restore w27
+; CHECK64-NEXT: .cfi_restore w28
+; CHECK64-NEXT: .cfi_restore w30
+; CHECK64-NEXT: .cfi_restore w29
+; CHECK64-NEXT: ret
+;
+; CHECK1024-LABEL: svecc_call_dynamic_alloca:
+; CHECK1024: // %bb.0: // %entry
+; CHECK1024-NEXT: sub sp, sp, #1088
+; CHECK1024-NEXT: .cfi_def_cfa_offset 1088
+; CHECK1024-NEXT: cntd x9
+; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill
+; CHECK1024-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill
+; CHECK1024-NEXT: str x9, [sp, #1040] // 8-byte Folded Spill
+; CHECK1024-NEXT: str x28, [sp, #1048] // 8-byte Folded Spill
+; CHECK1024-NEXT: str x27, [sp, #1056] // 8-byte Folded Spill
+; CHECK1024-NEXT: str x26, [sp, #1064] // 8-byte Folded Spill
+; CHECK1024-NEXT: str x20, [sp, #1072] // 8-byte Folded Spill
+; CHECK1024-NEXT: str x19, [sp, #1080] // 8-byte Folded Spill
+; CHECK1024-NEXT: add x29, sp, #1024
+; CHECK1024-NEXT: .cfi_def_cfa w29, 64
+; CHECK1024-NEXT: .cfi_offset w19, -8
+; CHECK1024-NEXT: .cfi_offset w20, -16
+; CHECK1024-NEXT: .cfi_offset w26, -24
+; CHECK1024-NEXT: .cfi_offset w27, -32
+; CHECK1024-NEXT: .cfi_offset w28, -40
+; CHECK1024-NEXT: .cfi_offset w30, -56
+; CHECK1024-NEXT: .cfi_offset w29, -64
+; CHECK1024-NEXT: addvl sp, sp, #-18
+; CHECK1024-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1088 - 8 * VG
+; CHECK1024-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 1088 - 16 * VG
+; CHECK1024-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 1088 - 24 * VG
+; CHECK1024-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 1088 - 32 * VG
+; CHECK1024-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 1088 - 40 * VG
+; CHECK1024-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 1088 - 48 * VG
+; CHECK1024-NEXT: .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 1088 - 56 * VG
+; CHECK1024-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 1088 - 64 * VG
+; CHECK1024-NEXT: sub sp, sp, #1024
+; CHECK1024-NEXT: mov w9, w0
+; CHECK1024-NEXT: mov x8, sp
+; CHECK1024-NEXT: mov w2, w1
+; CHECK1024-NEXT: add x9, x9, #15
+; CHECK1024-NEXT: mov x19, sp
+; CHECK1024-NEXT: and x9, x9, #0x1fffffff0
+; CHECK1024-NEXT: sub x8, x8, x9
+; CHECK1024-NEXT: mov sp, x8
+; CHECK1024-NEXT: //APP
+; CHECK1024-NEXT: //NO_APP
+; CHECK1024-NEXT: bl __arm_sme_state
+; CHECK1024-NEXT: and x20, x0, #0x1
+; CHECK1024-NEXT: .cfi_offset vg, -48
+; CHECK1024-NEXT: tbz w20, #0, .LBB35_2
+; CHECK1024-NEXT: // %bb.1: // %entry
+; CHECK1024-NEXT: smstop sm
+; CHECK1024-NEXT: .LBB35_2: // %entry
+; CHECK1024-NEXT: mov x0, x8
+; CHECK1024-NEXT: mov w1, #45 // =0x2d
+; CHECK1024-NEXT: bl memset
+; CHECK1024-NEXT: tbz w20, #0, .LBB35_4
+; CHECK1024-NEXT: // %bb.3: // %entry
+; CHECK1024-NEXT: smstart sm
+; CHECK1024-NEXT: .LBB35_4: // %entry
+; CHECK1024-NEXT: mov w0, #22647 // =0x5877
+; CHECK1024-NEXT: movk w0, #59491, lsl #16
+; CHECK1024-NEXT: .cfi_restore vg
+; CHECK1024-NEXT: sub x8, x29, #1024
+; CHECK1024-NEXT: addvl sp, x8, #-18
+; CHECK1024-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT: .cfi_restore z8
+; CHECK1024-NEXT: .cfi_restore z9
+; CHECK1024-NEXT: .cfi_restore z10
+; CHECK1024-NEXT: .cfi_restore z11
+; CHECK1024-NEXT: .cfi_restore z12
+; CHECK1024-NEXT: .cfi_restore z13
+; CHECK1024-NEXT: .cfi_restore z14
+; CHECK1024-NEXT: .cfi_restore z15
+; CHECK1024-NEXT: sub sp, x29, #1024
+; CHECK1024-NEXT: .cfi_def_cfa wsp, 1088
+; CHECK1024-NEXT: ldr x19, [sp, #1080] // 8-byte Folded Reload
+; CHECK1024-NEXT: ldr x20, [sp, #1072] // 8-byte Folded Reload
+; CHECK1024-NEXT: ldr x26, [sp, #1064] // 8-byte Folded Reload
+; CHECK1024-NEXT: ldr x27, [sp, #1056] // 8-byte Folded Reload
+; CHECK1024-NEXT: ldr x28, [sp, #1048] // 8-byte Folded Reload
+; CHECK1024-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload
+; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload
+; CHECK1024-NEXT: add sp, sp, #1088
+; CHECK1024-NEXT: .cfi_def_cfa_offset 0
+; CHECK1024-NEXT: .cfi_restore w19
+; CHECK1024-NEXT: .cfi_restore w20
+; CHECK1024-NEXT: .cfi_restore w26
+; CHECK1024-NEXT: .cfi_restore w27
+; CHECK1024-NEXT: .cfi_restore w28
+; CHECK1024-NEXT: .cfi_restore w30
+; CHECK1024-NEXT: .cfi_restore w29
+; CHECK1024-NEXT: ret
+entry:
+ %ptr = alloca i8, i32 %P1
+ tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2
+ %call = call ptr @memset(ptr noundef nonnull %ptr, i32 noundef 45, i32 noundef %P2)
+ ret i32 -396142473
+}
+
+
+define i32 @svecc_call_realign(<4 x i16> %P0, i32 %P1, i32 %P2, <vscale x 16 x i8> %P3, i16 %P4) "aarch64_pstate_sm_compatible" {
+; CHECK0-LABEL: svecc_call_realign:
+; CHECK0: // %bb.0: // %entry
+; CHECK0-NEXT: stp x29, x30, [sp, #-64]! // 16-byte Folded Spill
+; CHECK0-NEXT: .cfi_def_cfa_offset 64
+; CHECK0-NEXT: cntd x9
+; CHECK0-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill
+; CHECK0-NEXT: str x9, [sp, #16] // 8-byte Folded Spill
+; CHECK0-NEXT: stp x26, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK0-NEXT: mov x29, sp
+; CHECK0-NEXT: .cfi_def_cfa w29, 64
+; CHECK0-NEXT: .cfi_offset w19, -8
+; CHECK0-NEXT: .cfi_offset w26, -16
+; CHECK0-NEXT: .cfi_offset w27, -24
+; CHECK0-NEXT: .cfi_offset w28, -32
+; CHECK0-NEXT: .cfi_offset w30, -56
+; CHECK0-NEXT: .cfi_offset w29, -64
+; CHECK0-NEXT: addvl sp, sp, #-18
+; CHECK0-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 64 - 8 * VG
+; CHECK0-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 64 - 16 * VG
+; CHECK0-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 64 - 24 * VG
+; CHECK0-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 64 - 32 * VG
+; CHECK0-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 64 - 40 * VG
+; CHECK0-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 64 - 48 * VG
+; CHECK0-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 64 - 56 * VG
+; CHECK0-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 64 - 64 * VG
+; CHECK0-NEXT: sub x9, sp, #1024
+; CHECK0-NEXT: and sp, x9, #0xffffffffffffffe0
+; CHECK0-NEXT: mov w2, w1
+; CHECK0-NEXT: //APP
+; CHECK0-NEXT: //NO_APP
+; CHECK0-NEXT: bl __arm_sme_state
+; CHECK0-NEXT: and x19, x0, #0x1
+; CHECK0-NEXT: .cfi_offset vg, -48
+; CHECK0-NEXT: tbz w19, #0, .LBB36_2
+; CHECK0-NEXT: // %bb.1: // %entry
+; CHECK0-NEXT: smstop sm
+; CHECK0-NEXT: .LBB36_2: // %entry
+; CHECK0-NEXT: mov x0, sp
+; CHECK0-NEXT: mov w1, #45 // =0x2d
+; CHECK0-NEXT: bl memset
+; CHECK0-NEXT: tbz w19, #0, .LBB36_4
+; CHECK0-NEXT: // %bb.3: // %entry
+; CHECK0-NEXT: smstart sm
+; CHECK0-NEXT: .LBB36_4: // %entry
+; CHECK0-NEXT: mov w0, #22647 // =0x5877
+; CHECK0-NEXT: movk w0, #59491, lsl #16
+; CHECK0-NEXT: .cfi_restore vg
+; CHECK0-NEXT: addvl sp, x29, #-18
+; CHECK0-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT: .cfi_restore z8
+; CHECK0-NEXT: .cfi_restore z9
+; CHECK0-NEXT: .cfi_restore z10
+; CHECK0-NEXT: .cfi_restore z11
+; CHECK0-NEXT: .cfi_restore z12
+; CHECK0-NEXT: .cfi_restore z13
+; CHECK0-NEXT: .cfi_restore z14
+; CHECK0-NEXT: .cfi_restore z15
+; CHECK0-NEXT: mov sp, x29
+; CHECK0-NEXT: .cfi_def_cfa wsp, 64
+; CHECK0-NEXT: ldp x26, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK0-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload
+; CHECK0-NEXT: ldp x29, x30, [sp], #64 // 16-byte Folded Reload
+; CHECK0-NEXT: .cfi_def_cfa_offset 0
+; CHECK0-NEXT: .cfi_restore w19
+; CHECK0-NEXT: .cfi_restore w26
+; CHECK0-NEXT: .cfi_restore w27
+; CHECK0-NEXT: .cfi_restore w28
+; CHECK0-NEXT: .cfi_restore w30
+; CHECK0-NEXT: .cfi_restore w29
+; CHECK0-NEXT: ret
+;
+; CHECK64-LABEL: svecc_call_realign:
+; CHECK64: // %bb.0: // %entry
+; CHECK64-NEXT: sub sp, sp, #128
+; CHECK64-NEXT: .cfi_def_cfa_offset 128
+; CHECK64-NEXT: cntd x9
+; CHECK64-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK64-NEXT: stp x9, x28, [sp, #80] // 16-byte Folded Spill
+; CHECK64-NEXT: stp x27, x26, [sp, #96] // 16-byte Folded Spill
+; CHECK64-NEXT: str x19, [sp, #112] // 8-byte Folded Spill
+; CHECK64-NEXT: add x29, sp, #64
+; CHECK64-NEXT: .cfi_def_cfa w29, 64
+; CHECK64-NEXT: .cfi_offset w19, -16
+; CHECK64-NEXT: .cfi_offset w26, -24
+; CHECK64-NEXT: .cfi_offset w27, -32
+; CHECK64-NEXT: .cfi_offset w28, -40
+; CHECK64-NEXT: .cfi_offset w30, -56
+; CHECK64-NEXT: .cfi_offset w29, -64
+; CHECK64-NEXT: addvl sp, sp, #-18
+; CHECK64-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 128 - 8 * VG
+; CHECK64-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 128 - 16 * VG
+; CHECK64-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 128 - 24 * VG
+; CHECK64-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 128 - 32 * VG
+; CHECK64-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 128 - 40 * VG
+; CHECK64-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 128 - 48 * VG
+; CHECK64-NEXT: .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 128 - 56 * VG
+; CHECK64-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 128 - 64 * VG
+; CHECK64-NEXT: sub x9, sp, #1088
+; CHECK64-NEXT: and sp, x9, #0xffffffffffffffe0
+; CHECK64-NEXT: mov w2, w1
+; CHECK64-NEXT: //APP
+; CHECK64-NEXT: //NO_APP
+; CHECK64-NEXT: bl __arm_sme_state
+; CHECK64-NEXT: and x19, x0, #0x1
+; CHECK64-NEXT: .cfi_offset vg, -48
+; CHECK64-NEXT: tbz w19, #0, .LBB36_2
+; CHECK64-NEXT: // %bb.1: // %entry
+; CHECK64-NEXT: smstop sm
+; CHECK64-NEXT: .LBB36_2: // %entry
+; CHECK64-NEXT: mov x0, sp
+; CHECK64-NEXT: mov w1, #45 // =0x2d
+; CHECK64-NEXT: bl memset
+; CHECK64-NEXT: tbz w19, #0, .LBB36_4
+; CHECK64-NEXT: // %bb.3: // %entry
+; CHECK64-NEXT: smstart sm
+; CHECK64-NEXT: .LBB36_4: // %entry
+; CHECK64-NEXT: mov w0, #22647 // =0x5877
+; CHECK64-NEXT: movk w0, #59491, lsl #16
+; CHECK64-NEXT: .cfi_restore vg
+; CHECK64-NEXT: sub x8, x29, #64
+; CHECK64-NEXT: addvl sp, x8, #-18
+; CHECK64-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT: .cfi_restore z8
+; CHECK64-NEXT: .cfi_restore z9
+; CHECK64-NEXT: .cfi_restore z10
+; CHECK64-NEXT: .cfi_restore z11
+; CHECK64-NEXT: .cfi_restore z12
+; CHECK64-NEXT: .cfi_restore z13
+; CHECK64-NEXT: .cfi_restore z14
+; CHECK64-NEXT: .cfi_restore z15
+; CHECK64-NEXT: sub sp, x29, #64
+; CHECK64-NEXT: .cfi_def_cfa wsp, 128
+; CHECK64-NEXT: ldp x26, x19, [sp, #104] // 16-byte Folded Reload
+; CHECK64-NEXT: ldp x28, x27, [sp, #88] // 16-byte Folded Reload
+; CHECK64-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK64-NEXT: add sp, sp, #128
+; CHECK64-NEXT: .cfi_def_cfa_offset 0
+; CHECK64-NEXT: .cfi_restore w19
+; CHECK64-NEXT: .cfi_restore w26
+; CHECK64-NEXT: .cfi_restore w27
+; CHECK64-NEXT: .cfi_restore w28
+; CHECK64-NEXT: .cfi_restore w30
+; CHECK64-NEXT: .cfi_restore w29
+; CHECK64-NEXT: ret
+;
+; CHECK1024-LABEL: svecc_call_realign:
+; CHECK1024: // %bb.0: // %entry
+; CHECK1024-NEXT: sub sp, sp, #1088
+; CHECK1024-NEXT: .cfi_def_cfa_offset 1088
+; CHECK1024-NEXT: cntd x9
+; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill
+; CHECK1024-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill
+; CHECK1024-NEXT: str x9, [sp, #1040] // 8-byte Folded Spill
+; CHECK1024-NEXT: str x28, [sp, #1048] // 8-byte Folded Spill
+; CHECK1024-NEXT: str x27, [sp, #1056] // 8-byte Folded Spill
+; CHECK1024-NEXT: str x26, [sp, #1064] // 8-byte Folded Spill
+; CHECK1024-NEXT: str x19, [sp, #1072] // 8-byte Folded Spill
+; CHECK1024-NEXT: add x29, sp, #1024
+; CHECK1024-NEXT: .cfi_def_cfa w29, 64
+; CHECK1024-NEXT: .cfi_offset w19, -16
+; CHECK1024-NEXT: .cfi_offset w26, -24
+; CHECK1024-NEXT: .cfi_offset w27, -32
+; CHECK1024-NEXT: .cfi_offset w28, -40
+; CHECK1024-NEXT: .cfi_offset w30, -56
+; CHECK1024-NEXT: .cfi_offset w29, -64
+; CHECK1024-NEXT: addvl sp, sp, #-18
+; CHECK1024-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1088 - 8 * VG
+; CHECK1024-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 1088 - 16 * VG
+; CHECK1024-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 1088 - 24 * VG
+; CHECK1024-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 1088 - 32 * VG
+; CHECK1024-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 1088 - 40 * VG
+; CHECK1024-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 1088 - 48 * VG
+; CHECK1024-NEXT: .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 1088 - 56 * VG
+; CHECK1024-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 1088 - 64 * VG
+; CHECK1024-NEXT: sub x9, sp, #2048
+; CHECK1024-NEXT: and sp, x9, #0xffffffffffffffe0
+; CHECK1024-NEXT: mov w2, w1
+; CHECK1024-NEXT: //APP
+; CHECK1024-NEXT: //NO_APP
+; CHECK1024-NEXT: bl __arm_sme_state
+; CHECK1024-NEXT: and x19, x0, #0x1
+; CHECK1024-NEXT: .cfi_offset vg, -48
+; CHECK1024-NEXT: tbz w19, #0, .LBB36_2
+; CHECK1024-NEXT: // %bb.1: // %entry
+; CHECK1024-NEXT: smstop sm
+; CHECK1024-NEXT: .LBB36_2: // %entry
+; CHECK1024-NEXT: mov x0, sp
+; CHECK1024-NEXT: mov w1, #45 // =0x2d
+; CHECK1024-NEXT: bl memset
+; CHECK1024-NEXT: tbz w19, #0, .LBB36_4
+; CHECK1024-NEXT: // %bb.3: // %entry
+; CHECK1024-NEXT: smstart sm
+; CHECK1024-NEXT: .LBB36_4: // %entry
+; CHECK1024-NEXT: mov w0, #22647 // =0x5877
+; CHECK1024-NEXT: movk w0, #59491, lsl #16
+; CHECK1024-NEXT: .cfi_restore vg
+; CHECK1024-NEXT: sub x8, x29, #1024
+; CHECK1024-NEXT: addvl sp, x8, #-18
+; CHECK1024-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT: .cfi_restore z8
+; CHECK1024-NEXT: .cfi_restore z9
+; CHECK1024-NEXT: .cfi_restore z10
+; CHECK1024-NEXT: .cfi_restore z11
+; CHECK1024-NEXT: .cfi_restore z12
+; CHECK1024-NEXT: .cfi_restore z13
+; CHECK1024-NEXT: .cfi_restore z14
+; CHECK1024-NEXT: .cfi_restore z15
+; CHECK1024-NEXT: sub sp, x29, #1024
+; CHECK1024-NEXT: .cfi_def_cfa wsp, 1088
+; CHECK1024-NEXT: ldr x19, [sp, #1072] // 8-byte Folded Reload
+; CHECK1024-NEXT: ldr x26, [sp, #1064] // 8-byte Folded Reload
+; CHECK1024-NEXT: ldr x27, [sp, #1056] // 8-byte Folded Reload
+; CHECK1024-NEXT: ldr x28, [sp, #1048] // 8-byte Folded Reload
+; CHECK1024-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload
+; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload
+; CHECK1024-NEXT: add sp, sp, #1088
+; CHECK1024-NEXT: .cfi_def_cfa_offset 0
+; CHECK1024-NEXT: .cfi_restore w19
+; CHECK1024-NEXT: .cfi_restore w26
+; CHECK1024-NEXT: .cfi_restore w27
+; CHECK1024-NEXT: .cfi_restore w28
+; CHECK1024-NEXT: .cfi_restore w30
+; CHECK1024-NEXT: .cfi_restore w29
+; CHECK1024-NEXT: ret
+entry:
+ %ptr = alloca i8, i32 1000, align 32
+ tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2
+ %call = call ptr @memset(ptr noundef nonnull %ptr, i32 noundef 45, i32 noundef %P2)
+ ret i32 -396142473
+}
+
+
+define i32 @svecc_call_dynamic_and_scalable_alloca(<4 x i16> %P0, i32 %P1, i32 %P2, <vscale x 16 x i8> %P3, i16 %P4) "aarch64_pstate_sm_compatible" {
+; CHECK0-LABEL: svecc_call_dynamic_and_scalable_alloca:
+; CHECK0: // %bb.0: // %entry
+; CHECK0-NEXT: stp x29, x30, [sp, #-64]! // 16-byte Folded Spill
+; CHECK0-NEXT: str x28, [sp, #16] // 8-byte Folded Spill
+; CHECK0-NEXT: mov x29, sp
+; CHECK0-NEXT: stp x27, x26, [sp, #32] // 16-byte Folded Spill
+; CHECK0-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK0-NEXT: addvl sp, sp, #-18
+; CHECK0-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT: sub sp, sp, #48
+; CHECK0-NEXT: addvl sp, sp, #-1
+; CHECK0-NEXT: mov x19, sp
+; CHECK0-NEXT: .cfi_def_cfa w29, 64
+; CHECK0-NEXT: .cfi_offset w19, -8
+; CHECK0-NEXT: .cfi_offset w20, -16
+; CHECK0-NEXT: .cfi_offset w26, -24
+; CHECK0-NEXT: .cfi_offset w27, -32
+; CHECK0-NEXT: .cfi_offset w28, -48
+; CHECK0-NEXT: .cfi_offset w30, -56
+; CHECK0-NEXT: .cfi_offset w29, -64
+; CHECK0-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 64 - 8 * VG
+; CHECK0-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 64 - 16 * VG
+; CHECK0-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 64 - 24 * VG
+; CHECK0-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 64 - 32 * VG
+; CHECK0-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 64 - 40 * VG
+; CHECK0-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 64 - 48 * VG
+; CHECK0-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 64 - 56 * VG
+; CHECK0-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 64 - 64 * VG
+; CHECK0-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK0-NEXT: ubfiz x8, x0, #2, #32
+; CHECK0-NEXT: mov x9, sp
+; CHECK0-NEXT: add x8, x8, #15
+; CHECK0-NEXT: and x8, x8, #0x7fffffff0
+; CHECK0-NEXT: sub x20, x9, x8
+; CHECK0-NEXT: mov sp, x20
+; CHECK0-NEXT: //APP
+; CHECK0-NEXT: //NO_APP
+; CHECK0-NEXT: add x0, x19, #8
+; CHECK0-NEXT: bl bar
+; CHECK0-NEXT: addvl x0, x29, #-19
+; CHECK0-NEXT: bl bar
+; CHECK0-NEXT: mov x0, x20
+; CHECK0-NEXT: bl bar
+; CHECK0-NEXT: mov w0, #22647 // =0x5877
+; CHECK0-NEXT: movk w0, #59491, lsl #16
+; CHECK0-NEXT: addvl sp, x29, #-18
+; CHECK0-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT: mov sp, x29
+; CHECK0-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK0-NEXT: ldr x28, [sp, #16] // 8-byte Folded Reload
+; CHECK0-NEXT: ldp x27, x26, [sp, #32] // 16-byte Folded Reload
+; CHECK0-NEXT: ldp x29, x30, [sp], #64 // 16-byte Folded Reload
+; CHECK0-NEXT: ret
+;
+; CHECK64-LABEL: svecc_call_dynamic_and_scalable_alloca:
+; CHECK64: // %bb.0: // %entry
+; CHECK64-NEXT: sub sp, sp, #128
+; CHECK64-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK64-NEXT: add x29, sp, #64
+; CHECK64-NEXT: stp x28, x27, [sp, #80] // 16-byte Folded Spill
+; CHECK64-NEXT: stp x26, x20, [sp, #96] // 16-byte Folded Spill
+; CHECK64-NEXT: str x19, [sp, #112] // 8-byte Folded Spill
+; CHECK64-NEXT: addvl sp, sp, #-18
+; CHECK64-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT: sub sp, sp, #112
+; CHECK64-NEXT: addvl sp, sp, #-1
+; CHECK64-NEXT: mov x19, sp
+; CHECK64-NEXT: .cfi_def_cfa w29, 64
+; CHECK64-NEXT: .cfi_offset w19, -16
+; CHECK64-NEXT: .cfi_offset w20, -24
+; CHECK64-NEXT: .cfi_offset w26, -32
+; CHECK64-NEXT: .cfi_offset w27, -40
+; CHECK64-NEXT: .cfi_offset w28, -48
+; CHECK64-NEXT: .cfi_offset w30, -56
+; CHECK64-NEXT: .cfi_offset w29, -64
+; CHECK64-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 128 - 8 * VG
+; CHECK64-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 128 - 16 * VG
+; CHECK64-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 128 - 24 * VG
+; CHECK64-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 128 - 32 * VG
+; CHECK64-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 128 - 40 * VG
+; CHECK64-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 128 - 48 * VG
+; CHECK64-NEXT: .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 128 - 56 * VG
+; CHECK64-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 128 - 64 * VG
+; CHECK64-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK64-NEXT: ubfiz x8, x0, #2, #32
+; CHECK64-NEXT: mov x9, sp
+; CHECK64-NEXT: add x8, x8, #15
+; CHECK64-NEXT: and x8, x8, #0x7fffffff0
+; CHECK64-NEXT: sub x20, x9, x8
+; CHECK64-NEXT: mov sp, x20
+; CHECK64-NEXT: //APP
+; CHECK64-NEXT: //NO_APP
+; CHECK64-NEXT: add x0, x19, #8
+; CHECK64-NEXT: bl bar
+; CHECK64-NEXT: sub x0, x29, #64
+; CHECK64-NEXT: addvl x0, x0, #-19
+; CHECK64-NEXT: bl bar
+; CHECK64-NEXT: mov x0, x20
+; CHECK64-NEXT: bl bar
+; CHECK64-NEXT: mov w0, #22647 // =0x5877
+; CHECK64-NEXT: sub x8, x29, #64
+; CHECK64-NEXT: movk w0, #59491, lsl #16
+; CHECK64-NEXT: addvl sp, x8, #-18
+; CHECK64-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT: sub sp, x29, #64
+; CHECK64-NEXT: ldp x20, x19, [sp, #104] // 16-byte Folded Reload
+; CHECK64-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
+; CHECK64-NEXT: ldp x27, x26, [sp, #88] // 16-byte Folded Reload
+; CHECK64-NEXT: ldp x30, x28, [sp, #72] // 16-byte Folded Reload
+; CHECK64-NEXT: add sp, sp, #128
+; CHECK64-NEXT: ret
+;
+; CHECK1024-LABEL: svecc_call_dynamic_and_scalable_alloca:
+; CHECK1024: // %bb.0: // %entry
+; CHECK1024-NEXT: sub sp, sp, #1088
+; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill
+; CHECK1024-NEXT: add x29, sp, #1024
+; CHECK1024-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill
+; CHECK1024-NEXT: str x28, [sp, #1040] // 8-byte Folded Spill
+; CHECK1024-NEXT: str x27, [sp, #1048] // 8-byte Folded Spill
+; CHECK1024-NEXT: str x26, [sp, #1056] // 8-byte Folded Spill
+; CHECK1024-NEXT: str x20, [sp, #1064] // 8-byte Folded Spill
+; CHECK1024-NEXT: str x19, [sp, #1072] // 8-byte Folded Spill
+; CHECK1024-NEXT: addvl sp, sp, #-18
+; CHECK1024-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT: sub sp, sp, #1072
+; CHECK1024-NEXT: addvl sp, sp, #-1
+; CHECK1024-NEXT: mov x19, sp
+; CHECK1024-NEXT: .cfi_def_cfa w29, 64
+; CHECK1024-NEXT: .cfi_offset w19, -16
+; CHECK1024-NEXT: .cfi_offset w20, -24
+; CHECK1024-NEXT: .cfi_offset w26, -32
+; CHECK1024-NEXT: .cfi_offset w27, -40
+; CHECK1024-NEXT: .cfi_offset w28, -48
+; CHECK1024-NEXT: .cfi_offset w30, -56
+; CHECK1024-NEXT: .cfi_offset w29, -64
+; CHECK1024-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1088 - 8 * VG
+; CHECK1024-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 1088 - 16 * VG
+; CHECK1024-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 1088 - 24 * VG
+; CHECK1024-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 1088 - 32 * VG
+; CHECK1024-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 1088 - 40 * VG
+; CHECK1024-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 1088 - 48 * VG
+; CHECK1024-NEXT: .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 1088 - 56 * VG
+; CHECK1024-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 1088 - 64 * VG
+; CHECK1024-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK1024-NEXT: ubfiz x8, x0, #2, #32
+; CHECK1024-NEXT: mov x9, sp
+; CHECK1024-NEXT: add x8, x8, #15
+; CHECK1024-NEXT: and x8, x8, #0x7fffffff0
+; CHECK1024-NEXT: sub x20, x9, x8
+; CHECK1024-NEXT: mov sp, x20
+; CHECK1024-NEXT: //APP
+; CHECK1024-NEXT: //NO_APP
+; CHECK1024-NEXT: add x0, x19, #8
+; CHECK1024-NEXT: bl bar
+; CHECK1024-NEXT: sub x0, x29, #1024
+; CHECK1024-NEXT: addvl x0, x0, #-19
+; CHECK1024-NEXT: bl bar
+; CHECK1024-NEXT: mov x0, x20
+; CHECK1024-NEXT: bl bar
+; CHECK1024-NEXT: mov w0, #22647 // =0x5877
+; CHECK1024-NEXT: sub x8, x29, #1024
+; CHECK1024-NEXT: movk w0, #59491, lsl #16
+; CHECK1024-NEXT: addvl sp, x8, #-18
+; CHECK1024-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT: sub sp, x29, #1024
+; CHECK1024-NEXT: ldr x19, [sp, #1072] // 8-byte Folded Reload
+; CHECK1024-NEXT: ldr x20, [sp, #1064] // 8-byte Folded Reload
+; CHECK1024-NEXT: ldr x26, [sp, #1056] // 8-byte Folded Reload
+; CHECK1024-NEXT: ldr x27, [sp, #1048] // 8-byte Folded Reload
+; CHECK1024-NEXT: ldr x28, [sp, #1040] // 8-byte Folded Reload
+; CHECK1024-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload
+; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload
+; CHECK1024-NEXT: add sp, sp, #1088
+; CHECK1024-NEXT: ret
+entry:
+ %a = alloca i32, i32 10
+ %b = alloca <vscale x 4 x i32>
+ %c = alloca i32, i32 %P1, align 4
+ tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2
+ call void @bar(ptr noundef nonnull %a)
+ call void @bar(ptr noundef nonnull %b)
+ call void @bar(ptr noundef nonnull %c)
+ ret i32 -396142473
+}
More information about the llvm-commits
mailing list