[llvm] [AArch64][SME] Save VG for unwind info when changing streaming-mode (PR #83301)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 28 09:29:43 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Kerry McLaughlin (kmclaughlin-arm)
<details>
<summary>Changes</summary>
If a function requires any streaming-mode change, the vector granule
value must be stored to the stack and unwind info must also describe the
save of VG to this location.
This patch adds VG to the list of callee-saved registers and increases the
callee-saved stack size if the function requires streaming-mode changes.
A new type is added to RegPairInfo, which is also used to skip restoring
the register used to spill the VG value in the epilogue.
See https://github.com/ARM-software/abi-aa/blob/main/aadwarf64/aadwarf64.rst
---
Patch is 166.12 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/83301.diff
11 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64FrameLowering.cpp (+57-2)
- (modified) llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll (+8-5)
- (modified) llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll (+24-7)
- (modified) llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll (+3-2)
- (modified) llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll (+279-193)
- (modified) llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll (+19-11)
- (modified) llvm/test/CodeGen/AArch64/sme-streaming-body.ll (+57-40)
- (modified) llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll (+66-45)
- (modified) llvm/test/CodeGen/AArch64/sme-streaming-interface.ll (+26-13)
- (modified) llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll (+5-4)
- (added) llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll (+641)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 5cc612e89162af..68564bc2ea7bf5 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -321,7 +321,7 @@ bool AArch64FrameLowering::homogeneousPrologEpilog(
return false;
auto *AFI = MF.getInfo<AArch64FunctionInfo>();
- if (AFI->hasSwiftAsyncContext())
+ if (AFI->hasSwiftAsyncContext() || AFI->hasStreamingModeChanges())
return false;
// If there are an odd number of GPRs before LR and FP in the CSRs list,
@@ -691,6 +691,9 @@ static void emitCalleeSavedRestores(MachineBasicBlock &MBB,
!static_cast<const AArch64RegisterInfo &>(TRI).regNeedsCFI(Reg, Reg))
continue;
+ if (!Info.isRestored())
+ continue;
+
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestore(
nullptr, TRI.getDwarfRegNum(Info.getReg(), true)));
BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
@@ -1344,6 +1347,7 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
MachineInstr::MIFlag FrameFlag = MachineInstr::FrameSetup,
int CFAOffset = 0) {
unsigned NewOpc;
+
switch (MBBI->getOpcode()) {
default:
llvm_unreachable("Unexpected callee-save save/restore opcode!");
@@ -1651,6 +1655,13 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
LiveRegs.removeReg(AArch64::LR);
}
+ // If the function contains streaming mode changes, we expect the first
+ // instruction of MBB to be a CNTD. Move past this instruction if found.
+ if (AFI->hasStreamingModeChanges()) {
+ assert(MBBI->getOpcode() == AArch64::CNTD_XPiI && "Unexpected instruction");
+ MBBI = std::next(MBBI);
+ }
+
auto VerifyClobberOnExit = make_scope_exit([&]() {
if (NonFrameStart == MBB.end())
return;
@@ -2756,7 +2767,7 @@ struct RegPairInfo {
unsigned Reg2 = AArch64::NoRegister;
int FrameIdx;
int Offset;
- enum RegType { GPR, FPR64, FPR128, PPR, ZPR } Type;
+ enum RegType { GPR, FPR64, FPR128, PPR, ZPR, VG } Type;
RegPairInfo() = default;
@@ -2768,6 +2779,7 @@ struct RegPairInfo {
return 2;
case GPR:
case FPR64:
+ case VG:
return 8;
case ZPR:
case FPR128:
@@ -2833,6 +2845,8 @@ static void computeCalleeSaveRegisterPairs(
RPI.Type = RegPairInfo::ZPR;
else if (AArch64::PPRRegClass.contains(RPI.Reg1))
RPI.Type = RegPairInfo::PPR;
+ else if (RPI.Reg1 == AArch64::VG)
+ RPI.Type = RegPairInfo::VG;
else
llvm_unreachable("Unsupported register class.");
@@ -2860,6 +2874,7 @@ static void computeCalleeSaveRegisterPairs(
break;
case RegPairInfo::PPR:
case RegPairInfo::ZPR:
+ case RegPairInfo::VG:
break;
}
}
@@ -3047,7 +3062,23 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
Size = 2;
Alignment = Align(2);
break;
+ case RegPairInfo::VG:
+ StrOpc = AArch64::STRXui;
+ Size = 8;
+ Alignment = Align(8);
+ break;
}
+
+ if (Reg1 == AArch64::VG) {
+ // Find an available register to store value of VG to.
+ Reg1 = findScratchNonCalleeSaveRegister(&MBB);
+ assert(Reg1 != AArch64::NoRegister);
+
+ BuildMI(MBB, MBB.begin(), DL, TII.get(AArch64::CNTD_XPiI), Reg1)
+ .addImm(31)
+ .addImm(1);
+ }
+
LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
dbgs() << ") -> fi#(" << RPI.FrameIdx;
@@ -3171,6 +3202,8 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
Size = 2;
Alignment = Align(2);
break;
+ case RegPairInfo::VG:
+ continue;
}
LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI);
if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
@@ -3313,6 +3346,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
CSStackSize += RegSize;
}
+ // Increase the callee-saved stack size if the function has streaming mode
+ // changes, as we will need to spill the value of the VG register.
+ if (AFI->hasStreamingModeChanges())
+ CSStackSize += 8;
+
// Save number of saved regs, so we can easily update CSStackSize later.
unsigned NumSavedRegs = SavedRegs.count();
@@ -3449,6 +3487,23 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
}
+ // Insert VG into the list of CSRs, immediately before LR if saved.
+ if (AFI->hasStreamingModeChanges()) {
+ auto VGInfo = CalleeSavedInfo(AArch64::VG);
+ VGInfo.setRestored(false);
+ bool InsertBeforeLR = false;
+
+ for (unsigned I = 0; I < CSI.size(); I++)
+ if (CSI[I].getReg() == AArch64::LR) {
+ InsertBeforeLR = true;
+ CSI.insert(CSI.begin() + I, VGInfo);
+ break;
+ }
+
+ if (!InsertBeforeLR)
+ CSI.push_back(VGInfo);
+ }
+
for (auto &CS : CSI) {
Register Reg = CS.getReg();
const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
diff --git a/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll b/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll
index 3fa1ee5b9b0114..2a57e4edff8080 100644
--- a/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll
+++ b/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -mattr=+sve < %s | FileCheck %s
; Verify that the following code can be compiled without +sme, because if the
; call is not entered in streaming-SVE mode at runtime, the codepath leading
@@ -10,11 +10,13 @@ target triple = "aarch64"
define void @streaming_compatible() #0 {
; CHECK-LABEL: streaming_compatible:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NEXT: cntd x9
+; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
; CHECK-NEXT: tbz w19, #0, .LBB0_2
@@ -26,11 +28,12 @@ define void @streaming_compatible() #0 {
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB0_4:
-; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
; CHECK-NEXT: ret
call void @non_streaming()
ret void
diff --git a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
index 2a78012045ff42..5605556275a96c 100644
--- a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
+++ b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
@@ -11,12 +11,14 @@ declare double @normal_callee(double)
define double @nonstreaming_caller_streaming_callee(double %x) nounwind noinline optnone {
; CHECK-FISEL-LABEL: nonstreaming_caller_streaming_callee:
; CHECK-FISEL: // %bb.0: // %entry
+; CHECK-FISEL-NEXT: cntd x9
; CHECK-FISEL-NEXT: sub sp, sp, #96
; CHECK-FISEL-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
; CHECK-FISEL-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
; CHECK-FISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-FISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
; CHECK-FISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
+; CHECK-FISEL-NEXT: str x9, [sp, #88] // 8-byte Folded Spill
; CHECK-FISEL-NEXT: str d0, [sp] // 8-byte Folded Spill
; CHECK-FISEL-NEXT: smstart sm
; CHECK-FISEL-NEXT: ldr d0, [sp] // 8-byte Folded Reload
@@ -37,12 +39,14 @@ define double @nonstreaming_caller_streaming_callee(double %x) nounwind noinline
;
; CHECK-GISEL-LABEL: nonstreaming_caller_streaming_callee:
; CHECK-GISEL: // %bb.0: // %entry
+; CHECK-GISEL-NEXT: cntd x9
; CHECK-GISEL-NEXT: sub sp, sp, #96
; CHECK-GISEL-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
; CHECK-GISEL-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
; CHECK-GISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-GISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
; CHECK-GISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
+; CHECK-GISEL-NEXT: str x9, [sp, #88] // 8-byte Folded Spill
; CHECK-GISEL-NEXT: str d0, [sp] // 8-byte Folded Spill
; CHECK-GISEL-NEXT: smstart sm
; CHECK-GISEL-NEXT: ldr d0, [sp] // 8-byte Folded Reload
@@ -70,12 +74,14 @@ entry:
define double @streaming_caller_nonstreaming_callee(double %x) nounwind noinline optnone "aarch64_pstate_sm_enabled" {
; CHECK-COMMON-LABEL: streaming_caller_nonstreaming_callee:
; CHECK-COMMON: // %bb.0: // %entry
+; CHECK-COMMON-NEXT: cntd x9
; CHECK-COMMON-NEXT: sub sp, sp, #96
; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
+; CHECK-COMMON-NEXT: str x9, [sp, #88] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: str d0, [sp] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: smstop sm
; CHECK-COMMON-NEXT: ldr d0, [sp] // 8-byte Folded Reload
@@ -102,12 +108,14 @@ entry:
define double @locally_streaming_caller_normal_callee(double %x) nounwind noinline optnone "aarch64_pstate_sm_body" {
; CHECK-COMMON-LABEL: locally_streaming_caller_normal_callee:
; CHECK-COMMON: // %bb.0:
+; CHECK-COMMON-NEXT: cntd x9
; CHECK-COMMON-NEXT: sub sp, sp, #112
; CHECK-COMMON-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
+; CHECK-COMMON-NEXT: str x9, [sp, #104] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: str d0, [sp, #24] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: smstart sm
; CHECK-COMMON-NEXT: ldr d0, [sp, #24] // 8-byte Folded Reload
@@ -166,11 +174,13 @@ define double @normal_caller_to_locally_streaming_callee(double %x) nounwind noi
define void @locally_streaming_caller_streaming_callee_ptr(ptr %p) nounwind noinline optnone "aarch64_pstate_sm_body" {
; CHECK-COMMON-LABEL: locally_streaming_caller_streaming_callee_ptr:
; CHECK-COMMON: // %bb.0:
+; CHECK-COMMON-NEXT: cntd x9
; CHECK-COMMON-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-COMMON-NEXT: str x9, [sp, #72] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: smstart sm
; CHECK-COMMON-NEXT: blr x0
; CHECK-COMMON-NEXT: smstop sm
@@ -187,11 +197,13 @@ define void @locally_streaming_caller_streaming_callee_ptr(ptr %p) nounwind noin
define void @normal_call_to_streaming_callee_ptr(ptr %p) nounwind noinline optnone {
; CHECK-COMMON-LABEL: normal_call_to_streaming_callee_ptr:
; CHECK-COMMON: // %bb.0:
+; CHECK-COMMON-NEXT: cntd x9
; CHECK-COMMON-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-COMMON-NEXT: str x9, [sp, #72] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: smstart sm
; CHECK-COMMON-NEXT: blr x0
; CHECK-COMMON-NEXT: smstop sm
@@ -325,12 +337,13 @@ define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_inout_za" nounwind {
define fp128 @f128_call_sm(fp128 %a, fp128 %b) "aarch64_pstate_sm_enabled" nounwind {
; CHECK-COMMON-LABEL: f128_call_sm:
; CHECK-COMMON: // %bb.0:
+; CHECK-COMMON-NEXT: cntd x9
; CHECK-COMMON-NEXT: sub sp, sp, #112
; CHECK-COMMON-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
+; CHECK-COMMON-NEXT: stp x30, x9, [sp, #96] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp q1, q0, [sp] // 32-byte Folded Spill
; CHECK-COMMON-NEXT: smstop sm
; CHECK-COMMON-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
@@ -386,12 +399,13 @@ define double @frem_call_za(double %a, double %b) "aarch64_inout_za" nounwind {
define float @frem_call_sm(float %a, float %b) "aarch64_pstate_sm_enabled" nounwind {
; CHECK-COMMON-LABEL: frem_call_sm:
; CHECK-COMMON: // %bb.0:
+; CHECK-COMMON-NEXT: cntd x9
; CHECK-COMMON-NEXT: sub sp, sp, #96
; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
+; CHECK-COMMON-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp s1, s0, [sp, #8] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: smstop sm
; CHECK-COMMON-NEXT: ldp s1, s0, [sp, #8] // 8-byte Folded Reload
@@ -414,12 +428,14 @@ define float @frem_call_sm(float %a, float %b) "aarch64_pstate_sm_enabled" nounw
define float @frem_call_sm_compat(float %a, float %b) "aarch64_pstate_sm_compatible" nounwind {
; CHECK-COMMON-LABEL: frem_call_sm_compat:
; CHECK-COMMON: // %bb.0:
-; CHECK-COMMON-NEXT: sub sp, sp, #96
+; CHECK-COMMON-NEXT: cntd x9
+; CHECK-COMMON-NEXT: sub sp, sp, #112
; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: bl __arm_sme_state
; CHECK-COMMON-NEXT: ldp s2, s0, [sp, #8] // 8-byte Folded Reload
@@ -436,13 +452,14 @@ define float @frem_call_sm_compat(float %a, float %b) "aarch64_pstate_sm_compati
; CHECK-COMMON-NEXT: // %bb.3:
; CHECK-COMMON-NEXT: smstart sm
; CHECK-COMMON-NEXT: .LBB12_4:
-; CHECK-COMMON-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
+; CHECK-COMMON-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT: add sp, sp, #96
+; CHECK-COMMON-NEXT: add sp, sp, #112
; CHECK-COMMON-NEXT: ret
%res = frem float %a, %b
ret float %res
diff --git a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
index 9d635f0b88f191..c24585a971fb7a 100644
--- a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
+++ b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
@@ -121,13 +121,14 @@ define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_inou
define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_inout_za" "aarch64_pstate_sm_compatible" {
; CHECK-LABEL: test_lazy_save_and_conditional_smstart:
; CHECK: // %bb.0:
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: add x29, sp, #64
-; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill
+; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mov x9, sp
@@ -160,7 +161,7 @@ define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_inout_za
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: sub sp, x29, #64
; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll b/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll
index d5bea725b6d14d..0fb85bb7e05a14 100644
--- a/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll
+++ b/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll
@@ -15,12 +15,13 @@ target triple = "aarch64-unknown-unknown-eabi-elf"
define void @dont_coalesce_arg_i8(i8 %arg, ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_arg_i8:
; CHECK: // %bb.0:
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: fmov s0, w0
; CHECK-NEXT: mov x19, x1
@@ -32,8 +33,8 @@ define void @dont_coalesce_arg_i8(i8 %arg, ptr %ptr) #0 {
; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: st1b { z0.b }, p0, [x19]
; CHECK-NEXT: addvl sp, sp, #1
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folde...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/83301
More information about the llvm-commits
mailing list