[llvm] [AArch64][SME] Enable split SVE for hazard padding in SVE CC functions (PR #166561)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 5 05:56:44 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Benjamin Maxwell (MacDue)
<details>
<summary>Changes</summary>
This patch enables `aarch64-split-sve-objects` to handle hazard padding in functions that use the SVE CC even when there are no predicate spills/locals.
This improves the codegen over the base hazard padding implementation, as rather than placing the padding in the callee-save area, it is placed at the start of the ZPR area.
E.g., Current lowering:
```
sub sp, sp, #<!-- -->1040
str x29, [sp, #<!-- -->1024] // 8-byte Folded Spill
addvl sp, sp, #-1
str z8, [sp] // 16-byte Folded Spill
sub sp, sp, #<!-- -->1040
```
New lowering:
```
str x29, [sp, #-16]! // 8-byte Folded Spill
sub sp, sp, #<!-- -->1024
addvl sp, sp, #-1
str z8, [sp] // 16-byte Folded Spill
sub sp, sp, #<!-- -->1040
```
This also re-enables paired stores for GPRs (as the offsets no longer include the hazard padding).
---
Patch is 43.94 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/166561.diff
3 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64FrameLowering.cpp (+22-10)
- (modified) llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll (+7-10)
- (modified) llvm/test/CodeGen/AArch64/stack-hazard.ll (+372-202)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 3ee4d58ca892c..ced61106532a3 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -2364,9 +2364,31 @@ void AArch64FrameLowering::determineStackHazardSlot(
AFI->setStackHazardSlotIndex(ID);
}
+ if (!AFI->hasStackHazardSlotIndex())
+ return;
+
// Determine if we should use SplitSVEObjects. This should only be used if
// there's a possibility of a stack hazard between PPRs and ZPRs or FPRs.
if (SplitSVEObjects) {
+ CallingConv::ID CC = MF.getFunction().getCallingConv();
+ if (AFI->isSVECC() || CC == CallingConv::AArch64_SVE_VectorCall) {
+ AFI->setSplitSVEObjects(true);
+ LLVM_DEBUG(dbgs() << "Using SplitSVEObjects for SVE CC function\n");
+ return;
+ }
+
+ LLVM_DEBUG(dbgs() << "Determining if SplitSVEObjects should be used in "
+ "non-SVE CC function...\n");
+
+ // If another calling convention is explicitly set FPRs can't be promoted to
+ // ZPR callee-saves.
+ if (!is_contained({CallingConv::C, CallingConv::Fast}, CC)) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Calling convention is not supported with SplitSVEObjects\n");
+ return;
+ }
+
if (!HasPPRCSRs && !HasPPRStackObjects) {
LLVM_DEBUG(
dbgs() << "Not using SplitSVEObjects as no PPRs are on the stack\n");
@@ -2380,16 +2402,6 @@ void AArch64FrameLowering::determineStackHazardSlot(
return;
}
- // If another calling convention is explicitly set FPRs can't be promoted to
- // ZPR callee-saves.
- if (!is_contained({CallingConv::C, CallingConv::Fast,
- CallingConv::AArch64_SVE_VectorCall},
- MF.getFunction().getCallingConv())) {
- LLVM_DEBUG(
- dbgs() << "Calling convention is not supported with SplitSVEObjects");
- return;
- }
-
[[maybe_unused]] const AArch64Subtarget &Subtarget =
MF.getSubtarget<AArch64Subtarget>();
assert(Subtarget.isSVEorStreamingSVEAvailable() &&
diff --git a/llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll b/llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll
index f65aec6665cec..9d8b077e9268e 100644
--- a/llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll
+++ b/llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll
@@ -839,11 +839,10 @@ define aarch64_sve_vector_pcs void @only_ppr_csr_vla(i64 %n) {
define aarch64_sve_vector_pcs void @only_zpr_csr_vla(i64 %n) {
; CHECK-LABEL: only_zpr_csr_vla:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #1056
-; CHECK-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill
-; CHECK-NEXT: add x29, sp, #1024
-; CHECK-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill
-; CHECK-NEXT: str x19, [sp, #1040] // 8-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: mov x29, sp
+; CHECK-NEXT: sub sp, sp, #1024
; CHECK-NEXT: addvl sp, sp, #-3
; CHECK-NEXT: str z10, [sp] // 16-byte Folded Spill
; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill
@@ -870,11 +869,9 @@ define aarch64_sve_vector_pcs void @only_zpr_csr_vla(i64 %n) {
; CHECK-NEXT: ldr z10, [sp] // 16-byte Folded Reload
; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload
-; CHECK-NEXT: sub sp, x29, #1024
-; CHECK-NEXT: ldr x19, [sp, #1040] // 8-byte Folded Reload
-; CHECK-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload
-; CHECK-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #1056
+; CHECK-NEXT: mov sp, x29
+; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEXT: ret
%alloc = alloca i8, i64 %n, align 1
call void (...) @llvm.fake.use(ptr %alloc)
diff --git a/llvm/test/CodeGen/AArch64/stack-hazard.ll b/llvm/test/CodeGen/AArch64/stack-hazard.ll
index 70874761b82ab..05450468f87a7 100644
--- a/llvm/test/CodeGen/AArch64/stack-hazard.ll
+++ b/llvm/test/CodeGen/AArch64/stack-hazard.ll
@@ -975,8 +975,8 @@ define i32 @svecc_csr_d8(i32 noundef %num, <vscale x 4 x i32> %vs) "aarch64_psta
;
; CHECK64-LABEL: svecc_csr_d8:
; CHECK64: // %bb.0: // %entry
-; CHECK64-NEXT: sub sp, sp, #80
-; CHECK64-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
+; CHECK64-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK64-NEXT: sub sp, sp, #64
; CHECK64-NEXT: addvl sp, sp, #-1
; CHECK64-NEXT: str z8, [sp] // 16-byte Folded Spill
; CHECK64-NEXT: sub sp, sp, #64
@@ -988,30 +988,50 @@ define i32 @svecc_csr_d8(i32 noundef %num, <vscale x 4 x i32> %vs) "aarch64_psta
; CHECK64-NEXT: //NO_APP
; CHECK64-NEXT: add sp, sp, #64
; CHECK64-NEXT: ldr z8, [sp] // 16-byte Folded Reload
+; CHECK64-NEXT: add sp, sp, #64
; CHECK64-NEXT: addvl sp, sp, #1
-; CHECK64-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
-; CHECK64-NEXT: add sp, sp, #80
+; CHECK64-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK64-NEXT: ret
;
-; CHECK1024-LABEL: svecc_csr_d8:
-; CHECK1024: // %bb.0: // %entry
-; CHECK1024-NEXT: sub sp, sp, #1040
-; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill
-; CHECK1024-NEXT: addvl sp, sp, #-1
-; CHECK1024-NEXT: str z8, [sp] // 16-byte Folded Spill
-; CHECK1024-NEXT: sub sp, sp, #1024
-; CHECK1024-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x90, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2064 + 8 * VG
-; CHECK1024-NEXT: .cfi_offset w29, -16
-; CHECK1024-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 8 * VG - 1040
-; CHECK1024-NEXT: mov w0, wzr
-; CHECK1024-NEXT: //APP
-; CHECK1024-NEXT: //NO_APP
-; CHECK1024-NEXT: add sp, sp, #1024
-; CHECK1024-NEXT: ldr z8, [sp] // 16-byte Folded Reload
-; CHECK1024-NEXT: addvl sp, sp, #1
-; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload
-; CHECK1024-NEXT: add sp, sp, #1040
-; CHECK1024-NEXT: ret
+; CHECK1024-NOSPLITSVE-LABEL: svecc_csr_d8:
+; CHECK1024-NOSPLITSVE: // %bb.0: // %entry
+; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1040
+; CHECK1024-NOSPLITSVE-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #-1
+; CHECK1024-NOSPLITSVE-NEXT: str z8, [sp] // 16-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1024
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x90, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2064 + 8 * VG
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w29, -16
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 8 * VG - 1040
+; CHECK1024-NOSPLITSVE-NEXT: mov w0, wzr
+; CHECK1024-NOSPLITSVE-NEXT: //APP
+; CHECK1024-NOSPLITSVE-NEXT: //NO_APP
+; CHECK1024-NOSPLITSVE-NEXT: add sp, sp, #1024
+; CHECK1024-NOSPLITSVE-NEXT: ldr z8, [sp] // 16-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #1
+; CHECK1024-NOSPLITSVE-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: add sp, sp, #1040
+; CHECK1024-NOSPLITSVE-NEXT: ret
+;
+; CHECK1024-SPLITSVE-LABEL: svecc_csr_d8:
+; CHECK1024-SPLITSVE: // %bb.0: // %entry
+; CHECK1024-SPLITSVE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: sub sp, sp, #1024
+; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #-1
+; CHECK1024-SPLITSVE-NEXT: str z8, [sp] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: sub sp, sp, #1024
+; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x90, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2064 + 8 * VG
+; CHECK1024-SPLITSVE-NEXT: .cfi_offset w29, -16
+; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 8 * VG - 1040
+; CHECK1024-SPLITSVE-NEXT: mov w0, wzr
+; CHECK1024-SPLITSVE-NEXT: //APP
+; CHECK1024-SPLITSVE-NEXT: //NO_APP
+; CHECK1024-SPLITSVE-NEXT: add sp, sp, #1024
+; CHECK1024-SPLITSVE-NEXT: ldr z8, [sp] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: add sp, sp, #1024
+; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #1
+; CHECK1024-SPLITSVE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ret
entry:
tail call void asm sideeffect "", "~{d8}"() #1
ret i32 0
@@ -1039,8 +1059,8 @@ define i32 @svecc_csr_d8d9(i32 noundef %num, <vscale x 4 x i32> %vs) "aarch64_ps
;
; CHECK64-LABEL: svecc_csr_d8d9:
; CHECK64: // %bb.0: // %entry
-; CHECK64-NEXT: sub sp, sp, #80
-; CHECK64-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
+; CHECK64-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK64-NEXT: sub sp, sp, #64
; CHECK64-NEXT: addvl sp, sp, #-2
; CHECK64-NEXT: str z9, [sp] // 16-byte Folded Spill
; CHECK64-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill
@@ -1055,33 +1075,56 @@ define i32 @svecc_csr_d8d9(i32 noundef %num, <vscale x 4 x i32> %vs) "aarch64_ps
; CHECK64-NEXT: add sp, sp, #64
; CHECK64-NEXT: ldr z9, [sp] // 16-byte Folded Reload
; CHECK64-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT: add sp, sp, #64
; CHECK64-NEXT: addvl sp, sp, #2
-; CHECK64-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
-; CHECK64-NEXT: add sp, sp, #80
+; CHECK64-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK64-NEXT: ret
;
-; CHECK1024-LABEL: svecc_csr_d8d9:
-; CHECK1024: // %bb.0: // %entry
-; CHECK1024-NEXT: sub sp, sp, #1040
-; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill
-; CHECK1024-NEXT: addvl sp, sp, #-2
-; CHECK1024-NEXT: str z9, [sp] // 16-byte Folded Spill
-; CHECK1024-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill
-; CHECK1024-NEXT: sub sp, sp, #1024
-; CHECK1024-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x90, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 2064 + 16 * VG
-; CHECK1024-NEXT: .cfi_offset w29, -16
-; CHECK1024-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 8 * VG - 1040
-; CHECK1024-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d9 @ cfa - 16 * VG - 1040
-; CHECK1024-NEXT: mov w0, wzr
-; CHECK1024-NEXT: //APP
-; CHECK1024-NEXT: //NO_APP
-; CHECK1024-NEXT: add sp, sp, #1024
-; CHECK1024-NEXT: ldr z9, [sp] // 16-byte Folded Reload
-; CHECK1024-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
-; CHECK1024-NEXT: addvl sp, sp, #2
-; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload
-; CHECK1024-NEXT: add sp, sp, #1040
-; CHECK1024-NEXT: ret
+; CHECK1024-NOSPLITSVE-LABEL: svecc_csr_d8d9:
+; CHECK1024-NOSPLITSVE: // %bb.0: // %entry
+; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1040
+; CHECK1024-NOSPLITSVE-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #-2
+; CHECK1024-NOSPLITSVE-NEXT: str z9, [sp] // 16-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1024
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x90, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 2064 + 16 * VG
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w29, -16
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 8 * VG - 1040
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d9 @ cfa - 16 * VG - 1040
+; CHECK1024-NOSPLITSVE-NEXT: mov w0, wzr
+; CHECK1024-NOSPLITSVE-NEXT: //APP
+; CHECK1024-NOSPLITSVE-NEXT: //NO_APP
+; CHECK1024-NOSPLITSVE-NEXT: add sp, sp, #1024
+; CHECK1024-NOSPLITSVE-NEXT: ldr z9, [sp] // 16-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #2
+; CHECK1024-NOSPLITSVE-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: add sp, sp, #1040
+; CHECK1024-NOSPLITSVE-NEXT: ret
+;
+; CHECK1024-SPLITSVE-LABEL: svecc_csr_d8d9:
+; CHECK1024-SPLITSVE: // %bb.0: // %entry
+; CHECK1024-SPLITSVE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: sub sp, sp, #1024
+; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #-2
+; CHECK1024-SPLITSVE-NEXT: str z9, [sp] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: sub sp, sp, #1024
+; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x90, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 2064 + 16 * VG
+; CHECK1024-SPLITSVE-NEXT: .cfi_offset w29, -16
+; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 8 * VG - 1040
+; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d9 @ cfa - 16 * VG - 1040
+; CHECK1024-SPLITSVE-NEXT: mov w0, wzr
+; CHECK1024-SPLITSVE-NEXT: //APP
+; CHECK1024-SPLITSVE-NEXT: //NO_APP
+; CHECK1024-SPLITSVE-NEXT: add sp, sp, #1024
+; CHECK1024-SPLITSVE-NEXT: ldr z9, [sp] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: add sp, sp, #1024
+; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #2
+; CHECK1024-SPLITSVE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ret
entry:
tail call void asm sideeffect "", "~{d8},~{d9}"() #1
ret i32 0
@@ -1108,8 +1151,8 @@ define i32 @svecc_csr_d8_allocd(double %d, <vscale x 4 x i32> %vs) "aarch64_psta
;
; CHECK64-LABEL: svecc_csr_d8_allocd:
; CHECK64: // %bb.0: // %entry
-; CHECK64-NEXT: sub sp, sp, #80
-; CHECK64-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
+; CHECK64-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK64-NEXT: sub sp, sp, #64
; CHECK64-NEXT: addvl sp, sp, #-1
; CHECK64-NEXT: str z8, [sp] // 16-byte Folded Spill
; CHECK64-NEXT: sub sp, sp, #80
@@ -1122,31 +1165,52 @@ define i32 @svecc_csr_d8_allocd(double %d, <vscale x 4 x i32> %vs) "aarch64_psta
; CHECK64-NEXT: str d0, [sp, #72]
; CHECK64-NEXT: add sp, sp, #80
; CHECK64-NEXT: ldr z8, [sp] // 16-byte Folded Reload
+; CHECK64-NEXT: add sp, sp, #64
; CHECK64-NEXT: addvl sp, sp, #1
-; CHECK64-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
-; CHECK64-NEXT: add sp, sp, #80
+; CHECK64-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK64-NEXT: ret
;
-; CHECK1024-LABEL: svecc_csr_d8_allocd:
-; CHECK1024: // %bb.0: // %entry
-; CHECK1024-NEXT: sub sp, sp, #1040
-; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill
-; CHECK1024-NEXT: addvl sp, sp, #-1
-; CHECK1024-NEXT: str z8, [sp] // 16-byte Folded Spill
-; CHECK1024-NEXT: sub sp, sp, #1040
-; CHECK1024-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2080 + 8 * VG
-; CHECK1024-NEXT: .cfi_offset w29, -16
-; CHECK1024-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 8 * VG - 1040
-; CHECK1024-NEXT: mov w0, wzr
-; CHECK1024-NEXT: //APP
-; CHECK1024-NEXT: //NO_APP
-; CHECK1024-NEXT: str d0, [sp, #1032]
-; CHECK1024-NEXT: add sp, sp, #1040
-; CHECK1024-NEXT: ldr z8, [sp] // 16-byte Folded Reload
-; CHECK1024-NEXT: addvl sp, sp, #1
-; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload
-; CHECK1024-NEXT: add sp, sp, #1040
-; CHECK1024-NEXT: ret
+; CHECK1024-NOSPLITSVE-LABEL: svecc_csr_d8_allocd:
+; CHECK1024-NOSPLITSVE: // %bb.0: // %entry
+; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1040
+; CHECK1024-NOSPLITSVE-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #-1
+; CHECK1024-NOSPLITSVE-NEXT: str z8, [sp] // 16-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1040
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2080 + 8 * VG
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w29, -16
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 8 * VG - 1040
+; CHECK1024-NOSPLITSVE-NEXT: mov w0, wzr
+; CHECK1024-NOSPLITSVE-NEXT: //APP
+; CHECK1024-NOSPLITSVE-NEXT: //NO_APP
+; CHECK1024-NOSPLITSVE-NEXT: str d0, [sp, #1032]
+; CHECK1024-NOSPLITSVE-NEXT: add sp, sp, #1040
+; CHECK1024-NOSPLITSVE-NEXT: ldr z8, [sp] // 16-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #1
+; CHECK1024-NOSPLITSVE-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: add sp, sp, #1040
+; CHECK1024-NOSPLITSVE-NEXT: ret
+;
+; CHECK1024-SPLITSVE-LABEL: svecc_csr_d8_allocd:
+; CHECK1024-SPLITSVE: // %bb.0: // %entry
+; CHECK1024-SPLITSVE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: sub sp, sp, #1024
+; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #-1
+; CHECK1024-SPLITSVE-NEXT: str z8, [sp] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: sub sp, sp, #1040
+; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2080 + 8 * VG
+; CHECK1024-SPLITSVE-NEXT: .cfi_offset w29, -16
+; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 8 * VG - 1040
+; CHECK1024-SPLITSVE-NEXT: mov w0, wzr
+; CHECK1024-SPLITSVE-NEXT: //APP
+; CHECK1024-SPLITSVE-NEXT: //NO_APP
+; CHECK1024-SPLITSVE-NEXT: str d0, [sp, #1032]
+; CHECK1024-SPLITSVE-NEXT: add sp, sp, #1040
+; CHECK1024-SPLITSVE-NEXT: ldr z8, [sp] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: add sp, sp, #1024
+; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #1
+; CHECK1024-SPLITSVE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ret
entry:
%a = alloca double
tail call void asm sideeffect "", "~{d8}"() #1
@@ -1176,8 +1240,8 @@ define i32 @svecc_csr_d8_alloci64(i64 %d, <vscale x 4 x i32> %vs) "aarch64_pstat
;
; CHECK64-LABEL: svecc_csr_d8_alloci64:
; CHECK64: // %bb.0: // %entry
-; CHECK64-NEXT: sub sp, sp, #80
-; CHECK64-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
+; CHECK64-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK64-NEXT: sub sp, sp, #64
; CHECK64-NEXT: addvl sp, sp, #-1
; CHECK64-NEXT: str z8, [sp] // 16-byte Folded Spill
; CHECK64-NEXT: sub sp, sp, #80
@@ -1191,32 +1255,54 @@ define i32 @svecc_csr_d8_alloci64(i64 %d, <vscale x 4 x i32> %vs) "aarch64_pstat
; CHECK64-NEXT: str x8, [sp, #8]
; CHECK64-NEXT: add sp, sp, #80
; CHECK64-NEXT: ldr z8, [sp] // 16-byte Folded Reload
+; CHECK64-NEXT: add sp, sp, #64
; CHECK64-NEXT: addvl sp, sp, #1
-; CHECK64-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
-; CHECK64-NEXT: add sp, sp, #80
+; CHECK64-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK64-NEXT: ret
;
-; CHECK1024-LABEL: svecc_csr_d8_alloci64:
-; CHECK1024: // %bb.0: // %entry
-; CHECK1024-NEXT: sub sp, sp, #1040
-; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill
-; CHECK1024-NEXT: addvl sp, sp, #-1
-; CHECK1024-NEXT: str z8, [sp] // 16-byte Folded Spill
-; CHECK1024-NEXT: sub sp, sp, #1040
-; CHECK1024-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2080 + 8 * VG
-; CHECK1024-NEXT: .cfi_offset w29, -16
-; CHECK1024-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 8 * VG - 1040
-; CHEC...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/166561
More information about the llvm-commits
mailing list