[llvm] [AArch64][SME] Spill p-regs as z-regs when streaming hazards are possible (PR #123752)
Benjamin Maxwell via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 24 09:54:00 PST 2025
https://github.com/MacDue updated https://github.com/llvm/llvm-project/pull/123752
>From 14d2b4c88e35d965bbb185a70631ea39f1c27c6e Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Mon, 13 Jan 2025 16:15:53 +0000
Subject: [PATCH 1/5] [AArch64][SME] Spill p-regs as z-regs when streaming
hazards are possible
This patch adds a new option `-aarch64-enable-zpr-predicate-spills`
(which is disabled by default), this option replaces predicate spills
with vector spills in streaming[-compatible] functions.
For example:
```
str p8, [sp, #7, mul vl] // 2-byte Folded Spill
// ...
ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
```
Becomes:
```
mov z0.b, p8/z, #1
str z0, [sp] // 16-byte Folded Spill
// ...
ldr z0, [sp] // 16-byte Folded Reload
ptrue p4.b
cmpne p8.b, p4/z, z0.b, #0
```
This is done to avoid streaming memory hazards between FPR/vector and
predicate spills, which currently occupy the same stack area even when
the `-aarch64-stack-hazard-size` flag is set.
This is implemented with two new pseudos SPILL_PPR_TO_ZPR_SLOT_PSEUDO
and FILL_PPR_FROM_ZPR_SLOT_PSEUDO. The expansion of these pseudos
handles scavenging the required registers (z0 in the above example) and,
in the worst case spilling a register to an emergency stack slot in the
expansion. The condition flags are also preserved around the `cmpne`
in case they are live at the expansion point.
---
.../Target/AArch64/AArch64FrameLowering.cpp | 335 +++++-
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 16 +-
.../Target/AArch64/AArch64RegisterInfo.cpp | 4 +-
llvm/lib/Target/AArch64/AArch64RegisterInfo.h | 2 +-
.../lib/Target/AArch64/AArch64RegisterInfo.td | 11 +-
llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 22 +
llvm/lib/Target/AArch64/AArch64Subtarget.h | 2 +
llvm/lib/Target/AArch64/SMEInstrFormats.td | 14 +
.../AArch64/spill-fill-zpr-predicates.mir | 1035 +++++++++++++++++
.../AArch64/ssve-stack-hazard-remarks.ll | 13 +-
10 files changed, 1444 insertions(+), 10 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index eabe64361938b4..64c3ecaf21ea31 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1630,6 +1630,9 @@ static bool IsSVECalleeSave(MachineBasicBlock::iterator I) {
case AArch64::STR_PXI:
case AArch64::LDR_ZXI:
case AArch64::LDR_PXI:
+ case AArch64::PTRUE_B:
+ case AArch64::CPY_ZPzI_B:
+ case AArch64::CMPNE_PPzZI_B:
return I->getFlag(MachineInstr::FrameSetup) ||
I->getFlag(MachineInstr::FrameDestroy);
}
@@ -3261,7 +3264,8 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
StrOpc = RPI.isPaired() ? AArch64::ST1B_2Z_IMM : AArch64::STR_ZXI;
break;
case RegPairInfo::PPR:
- StrOpc = AArch64::STR_PXI;
+ StrOpc =
+ Size == 16 ? AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO : AArch64::STR_PXI;
break;
case RegPairInfo::VG:
StrOpc = AArch64::STRXui;
@@ -3490,7 +3494,8 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
LdrOpc = RPI.isPaired() ? AArch64::LD1B_2Z_IMM : AArch64::LDR_ZXI;
break;
case RegPairInfo::PPR:
- LdrOpc = AArch64::LDR_PXI;
+ LdrOpc = Size == 16 ? AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO
+ : AArch64::LDR_PXI;
break;
case RegPairInfo::VG:
continue;
@@ -3716,6 +3721,14 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
continue;
}
+ // Always save P4 when PPR spills are ZPR-sized and a predicate above p8 is
+ // spilled. If all of p0-p3 are used as return values p4 is must be free
+ // to reload p8-p15.
+ if (RegInfo->getSpillSize(AArch64::PPRRegClass) == 16 &&
+ AArch64::PPR_p8to15RegClass.contains(Reg)) {
+ SavedRegs.set(AArch64::P4);
+ }
+
// MachO's compact unwind format relies on all registers being stored in
// pairs.
// FIXME: the usual format is actually better if unwinding isn't needed.
@@ -4155,8 +4168,318 @@ int64_t AArch64FrameLowering::assignSVEStackObjectOffsets(
true);
}
+/// Attempts to scavenge a register from \p ScavengeableRegs given the used
+/// registers in \p UsedRegs.
+static Register tryScavengeRegister(LiveRegUnits const &UsedRegs,
+ BitVector const &ScavengeableRegs) {
+ for (auto Reg : ScavengeableRegs.set_bits()) {
+ if (UsedRegs.available(Reg))
+ return Reg;
+ }
+ return AArch64::NoRegister;
+}
+
+/// Propagates frame-setup/destroy flags from \p SourceMI to all instructions in
+/// \p MachineInstrs.
+static void propagateFrameFlags(MachineInstr &SourceMI,
+ ArrayRef<MachineInstr *> MachineInstrs) {
+ for (MachineInstr *MI : MachineInstrs) {
+ if (SourceMI.getFlag(MachineInstr::FrameSetup))
+ MI->setFlag(MachineInstr::FrameSetup);
+ if (SourceMI.getFlag(MachineInstr::FrameDestroy))
+ MI->setFlag(MachineInstr::FrameDestroy);
+ }
+}
+
+/// RAII helper class for scavenging or spilling a register. On construction
+/// attempts to find a free register of class \p RC (given \p UsedRegs and \p
+/// AllocatableRegs), if no register can be found spills \p SpillCandidate to \p
+/// MaybeSpillFI to free a register. The free'd register is returned via the \p
+/// FreeReg output parameter. On destruction, if there is a spill, its previous
+/// value is reloaded. The spilling and scavenging is only valid at the
+/// insertion point \p MBBI, this class should _not_ be used in places that
+/// create or manipulate basic blocks, moving the expected insertion point.
+struct ScopedScavengeOrSpill {
+ ScopedScavengeOrSpill(const ScopedScavengeOrSpill &) = delete;
+ ScopedScavengeOrSpill(ScopedScavengeOrSpill &&) = delete;
+
+ ScopedScavengeOrSpill(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, Register &FreeReg,
+ Register SpillCandidate, const TargetRegisterClass &RC,
+ LiveRegUnits const &UsedRegs,
+ BitVector const &AllocatableRegs,
+ std::optional<int> &MaybeSpillFI)
+ : MBB(MBB), MBBI(MBBI), RC(RC), TII(static_cast<const AArch64InstrInfo &>(
+ *MF.getSubtarget().getInstrInfo())),
+ TRI(*MF.getSubtarget().getRegisterInfo()) {
+ FreeReg = tryScavengeRegister(UsedRegs, AllocatableRegs);
+ if (FreeReg != AArch64::NoRegister)
+ return;
+ if (!MaybeSpillFI) {
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ MaybeSpillFI = MFI.CreateSpillStackObject(TRI.getSpillSize(RC),
+ TRI.getSpillAlign(RC));
+ }
+ FreeReg = SpilledReg = SpillCandidate;
+ SpillFI = *MaybeSpillFI;
+ TII.storeRegToStackSlot(MBB, MBBI, SpilledReg, false, SpillFI, &RC, &TRI,
+ Register());
+ }
+
+ bool hasSpilled() const { return SpilledReg != AArch64::NoRegister; }
+
+ ~ScopedScavengeOrSpill() {
+ if (hasSpilled())
+ TII.loadRegFromStackSlot(MBB, MBBI, SpilledReg, SpillFI, &RC, &TRI,
+ Register());
+ }
+
+private:
+ MachineBasicBlock &MBB;
+ MachineBasicBlock::iterator MBBI;
+ const TargetRegisterClass &RC;
+ const AArch64InstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+ Register SpilledReg = AArch64::NoRegister;
+ int SpillFI = -1;
+};
+
+/// Emergency stack slots for expanding SPILL_PPR_TO_ZPR_SLOT_PSEUDO and
+/// FILL_PPR_FROM_ZPR_SLOT_PSEUDO.
+struct EmergencyStackSlots {
+ std::optional<int> ZPRSpillFI;
+ std::optional<int> PPRSpillFI;
+ std::optional<int> GPRSpillFI;
+};
+
+/// Expands:
+/// ```
+/// SPILL_PPR_TO_ZPR_SLOT_PSEUDO $p0, %stack.0, 0
+/// ```
+/// To:
+/// ```
+/// $z0 = CPY_ZPzI_B $p0, 1, 0
+/// STR_ZXI $z0, $stack.0, 0
+/// ```
+/// While ensuring a ZPR ($z0 in this example) is free for the predicate (
+/// spilling if necessary).
+static void expandSpillPPRToZPRSlotPseudo(MachineBasicBlock &MBB,
+ MachineInstr &MI,
+ const TargetRegisterInfo &TRI,
+ LiveRegUnits const &UsedRegs,
+ BitVector const &ZPRRegs,
+ EmergencyStackSlots &SpillSlots) {
+ MachineFunction &MF = *MBB.getParent();
+ auto *TII =
+ static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+ Register ZPredReg = AArch64::NoRegister;
+ ScopedScavengeOrSpill FindZPRReg(MF, MBB, MachineBasicBlock::iterator(MI),
+ ZPredReg, AArch64::Z0, AArch64::ZPRRegClass,
+ UsedRegs, ZPRRegs, SpillSlots.ZPRSpillFI);
+
+#ifndef NDEBUG
+ bool InPrologueOrEpilogue = MI.getFlag(MachineInstr::FrameSetup) ||
+ MI.getFlag(MachineInstr::FrameDestroy);
+ assert((!FindZPRReg.hasSpilled() || !InPrologueOrEpilogue) &&
+ "SPILL_PPR_TO_ZPR_SLOT_PSEUDO expansion should not spill in prologue "
+ "or epilogue");
+#endif
+
+ SmallVector<MachineInstr *, 2> MachineInstrs;
+ const DebugLoc &DL = MI.getDebugLoc();
+ MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::CPY_ZPzI_B))
+ .addReg(ZPredReg, RegState::Define)
+ .add(MI.getOperand(0))
+ .addImm(1)
+ .addImm(0)
+ .getInstr());
+ MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::STR_ZXI))
+ .addReg(ZPredReg)
+ .add(MI.getOperand(1))
+ .addImm(MI.getOperand(2).getImm())
+ .setMemRefs(MI.memoperands())
+ .getInstr());
+ propagateFrameFlags(MI, MachineInstrs);
+}
+
+/// Expands:
+/// ```
+/// $p0 = FILL_PPR_FROM_ZPR_SLOT_PSEUDO %stack.0, 0
+/// ```
+/// To:
+/// ```
+/// $z0 = LDR_ZXI %stack.0, 0
+/// $p0 = PTRUE_B 31, implicit $vg
+/// $p0 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+/// ```
+/// While ensuring a ZPR ($z0 in this example) is free for the predicate (
+/// spilling if necessary). If the status flags are in use at the point of
+/// expansion they are preserved (by moving them to/from a GPR). This may cause
+/// an additional spill if no GPR is free at the expansion point.
+static bool expandFillPPRFromZPRSlotPseudo(
+ MachineBasicBlock &MBB, MachineInstr &MI, const TargetRegisterInfo &TRI,
+ LiveRegUnits const &UsedRegs, BitVector const &ZPRRegs,
+ BitVector const &PPR3bRegs, BitVector const &GPRRegs,
+ EmergencyStackSlots &SpillSlots) {
+ MachineFunction &MF = *MBB.getParent();
+ auto *TII =
+ static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+ Register ZPredReg = AArch64::NoRegister;
+ ScopedScavengeOrSpill FindZPRReg(MF, MBB, MachineBasicBlock::iterator(MI),
+ ZPredReg, AArch64::Z0, AArch64::ZPRRegClass,
+ UsedRegs, ZPRRegs, SpillSlots.ZPRSpillFI);
+
+ Register PredReg = AArch64::NoRegister;
+ std::optional<ScopedScavengeOrSpill> FindPPR3bReg;
+ if (AArch64::PPR_3bRegClass.contains(MI.getOperand(0).getReg()))
+ PredReg = MI.getOperand(0).getReg();
+ else
+ FindPPR3bReg.emplace(MF, MBB, MachineBasicBlock::iterator(MI), PredReg,
+ AArch64::P0, AArch64::PPR_3bRegClass, UsedRegs,
+ PPR3bRegs, SpillSlots.PPRSpillFI);
+
+ // Elide NZCV spills if we know it is not used.
+ Register NZCVSaveReg = AArch64::NoRegister;
+ bool IsNZCVUsed = !UsedRegs.available(AArch64::NZCV);
+ std::optional<ScopedScavengeOrSpill> FindGPRReg;
+ if (IsNZCVUsed)
+ FindGPRReg.emplace(MF, MBB, MachineBasicBlock::iterator(MI), NZCVSaveReg,
+ AArch64::X0, AArch64::GPR64RegClass, UsedRegs, GPRRegs,
+ SpillSlots.GPRSpillFI);
+
+#ifndef NDEBUG
+ bool Spilled = FindZPRReg.hasSpilled() ||
+ (FindPPR3bReg && FindPPR3bReg->hasSpilled()) ||
+ (FindGPRReg && FindGPRReg->hasSpilled());
+ bool InPrologueOrEpilogue = MI.getFlag(MachineInstr::FrameSetup) ||
+ MI.getFlag(MachineInstr::FrameDestroy);
+ assert((!Spilled || !InPrologueOrEpilogue) &&
+ "FILL_PPR_FROM_ZPR_SLOT_PSEUDO expansion should not spill in prologue "
+ "or epilogue");
+#endif
+
+ SmallVector<MachineInstr *, 4> MachineInstrs;
+ const DebugLoc &DL = MI.getDebugLoc();
+ MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::LDR_ZXI))
+ .addReg(ZPredReg, RegState::Define)
+ .add(MI.getOperand(1))
+ .addImm(MI.getOperand(2).getImm())
+ .setMemRefs(MI.memoperands())
+ .getInstr());
+ if (IsNZCVUsed)
+ MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::MRS))
+ .addReg(NZCVSaveReg, RegState::Define)
+ .addImm(AArch64SysReg::NZCV)
+ .addReg(AArch64::NZCV, RegState::Implicit)
+ .getInstr());
+ MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::PTRUE_B))
+ .addReg(PredReg, RegState::Define)
+ .addImm(31));
+ MachineInstrs.push_back(
+ BuildMI(MBB, MI, DL, TII->get(AArch64::CMPNE_PPzZI_B))
+ .addReg(MI.getOperand(0).getReg(), RegState::Define)
+ .addReg(PredReg)
+ .addReg(ZPredReg)
+ .addImm(0)
+ .addReg(AArch64::NZCV, RegState::ImplicitDefine)
+ .getInstr());
+ if (IsNZCVUsed)
+ MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::MSR))
+ .addImm(AArch64SysReg::NZCV)
+ .addReg(NZCVSaveReg)
+ .addReg(AArch64::NZCV, RegState::ImplicitDefine)
+ .getInstr());
+
+ propagateFrameFlags(MI, MachineInstrs);
+ return FindPPR3bReg && FindPPR3bReg->hasSpilled();
+}
+
+/// Expands all FILL_PPR_FROM_ZPR_SLOT_PSEUDO and SPILL_PPR_TO_ZPR_SLOT_PSEUDO
+/// operations within the MachineBasicBlock \p MBB.
+static bool expandSMEPPRToZPRSpillPseudos(MachineBasicBlock &MBB,
+ const TargetRegisterInfo &TRI,
+ BitVector const &ZPRRegs,
+ BitVector const &PPR3bRegs,
+ BitVector const &GPRRegs,
+ EmergencyStackSlots &SpillSlots) {
+ LiveRegUnits UsedRegs(TRI);
+ UsedRegs.addLiveOuts(MBB);
+ bool HasPPRSpills = false;
+ for (MachineInstr &MI : make_early_inc_range(reverse(MBB))) {
+ UsedRegs.stepBackward(MI);
+ switch (MI.getOpcode()) {
+ case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
+ HasPPRSpills |= expandFillPPRFromZPRSlotPseudo(
+ MBB, MI, TRI, UsedRegs, ZPRRegs, PPR3bRegs, GPRRegs, SpillSlots);
+ MI.eraseFromParent();
+ break;
+ case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
+ expandSpillPPRToZPRSlotPseudo(MBB, MI, TRI, UsedRegs, ZPRRegs,
+ SpillSlots);
+ MI.eraseFromParent();
+ break;
+ default:
+ break;
+ }
+ }
+
+ return HasPPRSpills;
+}
+
void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
MachineFunction &MF, RegScavenger *RS) const {
+
+ AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ const TargetSubtargetInfo &TSI = MF.getSubtarget();
+ const TargetRegisterInfo &TRI = *TSI.getRegisterInfo();
+ if (AFI->hasStackFrame() && TRI.getSpillSize(AArch64::PPRRegClass) == 16) {
+ const uint32_t *CSRMask =
+ TRI.getCallPreservedMask(MF, MF.getFunction().getCallingConv());
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ assert(MFI.isCalleeSavedInfoValid());
+
+ auto ComputeScavengeableRegisters = [&](unsigned RegClassID) {
+ BitVector ScavengeableRegs =
+ TRI.getAllocatableSet(MF, TRI.getRegClass(RegClassID));
+ if (CSRMask)
+ ScavengeableRegs.clearBitsInMask(CSRMask);
+ // TODO: Allow reusing callee-saved registers that have been saved.
+ return ScavengeableRegs;
+ };
+
+ // If predicates spills are 16-bytes we may need to expand
+ // SPILL_PPR_TO_ZPR_SLOT_PSEUDO/FILL_PPR_FROM_ZPR_SLOT_PSEUDO.
+ // These are handled separately as we need to compute register liveness to
+ // scavenge a ZPR and PPR during the expansion.
+ BitVector ZPRRegs = ComputeScavengeableRegisters(AArch64::ZPRRegClassID);
+ // Only p0-7 are possible as the second operand of cmpne (needed for fills).
+ BitVector PPR3bRegs =
+ ComputeScavengeableRegisters(AArch64::PPR_3bRegClassID);
+ BitVector GPRRegs = ComputeScavengeableRegisters(AArch64::GPR64RegClassID);
+
+ bool SpillsAboveP7 =
+ any_of(MFI.getCalleeSavedInfo(), [](const CalleeSavedInfo &CSI) {
+ return AArch64::PPR_p8to15RegClass.contains(CSI.getReg());
+ });
+ // We spill p4 in determineCalleeSaves() if a predicate above p8 is spilled,
+ // as it may be needed to reload callee saves (if p0-p3 are used as
+ // returns).
+ if (SpillsAboveP7)
+ PPR3bRegs.set(AArch64::P4);
+
+ EmergencyStackSlots SpillSlots;
+ for (MachineBasicBlock &MBB : MF) {
+ for (int Pass = 0; Pass < 2; Pass++) {
+ bool HasPPRSpills = expandSMEPPRToZPRSpillPseudos(
+ MBB, TRI, ZPRRegs, PPR3bRegs, GPRRegs, SpillSlots);
+ if (!HasPPRSpills)
+ break;
+ }
+ }
+ }
+
MachineFrameInfo &MFI = MF.getFrameInfo();
assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown &&
@@ -4166,7 +4489,6 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
int64_t SVEStackSize =
assignSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex);
- AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
AFI->setStackSizeSVE(alignTo(SVEStackSize, 16U));
AFI->setMinMaxSVECSFrameIndex(MinCSFrameIndex, MaxCSFrameIndex);
@@ -5200,7 +5522,12 @@ void AArch64FrameLowering::emitRemarks(
unsigned RegTy = StackAccess::AccessType::GPR;
if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector) {
- if (AArch64::PPRRegClass.contains(MI.getOperand(0).getReg()))
+ // SPILL_PPR_TO_ZPR_SLOT_PSEUDO and FILL_PPR_FROM_ZPR_SLOT_PSEUDO
+ // spill/fill the predicate as a data vector (so are an FPR acess).
+ if (!is_contained({AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO,
+ AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO},
+ MI.getOpcode()) &&
+ AArch64::PPRRegClass.contains(MI.getOperand(0).getReg()))
RegTy = StackAccess::PPR;
else
RegTy = StackAccess::FPR;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 17dd8a073eff0f..0f2b969fba35c7 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -81,7 +81,7 @@ static cl::opt<unsigned>
AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
: AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
AArch64::CATCHRET),
- RI(STI.getTargetTriple()), Subtarget(STI) {}
+ RI(STI.getTargetTriple(), STI.getHwMode()), Subtarget(STI) {}
/// GetInstSize - Return the number of bytes of code the specified
/// instruction may be. This returns the maximum number of bytes.
@@ -2438,6 +2438,8 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
case AArch64::STZ2Gi:
case AArch64::STZGi:
case AArch64::TAGPstack:
+ case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
+ case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
return 2;
case AArch64::LD1B_D_IMM:
case AArch64::LD1B_H_IMM:
@@ -4223,6 +4225,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
MinOffset = -256;
MaxOffset = 254;
break;
+ case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
+ case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
case AArch64::LDR_ZXI:
case AArch64::STR_ZXI:
Scale = TypeSize::getScalable(16);
@@ -5355,6 +5359,11 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
"Unexpected register store without SVE store instructions");
Opc = AArch64::STR_ZXI;
StackID = TargetStackID::ScalableVector;
+ } else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.isSVEorStreamingSVEAvailable() &&
+ "Unexpected predicate store without SVE store instructions");
+ Opc = AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO;
+ StackID = TargetStackID::ScalableVector;
}
break;
case 24:
@@ -5527,6 +5536,11 @@ void AArch64InstrInfo::loadRegFromStackSlot(
"Unexpected register load without SVE load instructions");
Opc = AArch64::LDR_ZXI;
StackID = TargetStackID::ScalableVector;
+ } else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.isSVEorStreamingSVEAvailable() &&
+ "Unexpected predicate load without SVE load instructions");
+ Opc = AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO;
+ StackID = TargetStackID::ScalableVector;
}
break;
case 24:
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 5973b63b5a8024..e9730348ba58e5 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -38,8 +38,8 @@ using namespace llvm;
#define GET_REGINFO_TARGET_DESC
#include "AArch64GenRegisterInfo.inc"
-AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT)
- : AArch64GenRegisterInfo(AArch64::LR), TT(TT) {
+AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT, unsigned HwMode)
+ : AArch64GenRegisterInfo(AArch64::LR, 0, 0, 0, HwMode), TT(TT) {
AArch64_MC::initLLVMToCVRegMapping(this);
}
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
index 11da624af4881b..898a509f75908f 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -27,7 +27,7 @@ class AArch64RegisterInfo final : public AArch64GenRegisterInfo {
const Triple &TT;
public:
- AArch64RegisterInfo(const Triple &TT);
+ AArch64RegisterInfo(const Triple &TT, unsigned HwMode);
// FIXME: This should be tablegen'd like getDwarfRegNum is
int getSEHRegNum(unsigned i) const {
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index dd4f2549929f84..6b6884c5457589 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -979,10 +979,19 @@ class ZPRRegOp <string Suffix, AsmOperandClass C, ElementSizeEnum Size,
//******************************************************************************
// SVE predicate register classes.
+
+// Note: This hardware mode is enabled in AArch64Subtarget::getHwModeSet()
+// (without the use of the table-gen'd predicates).
+def SMEWithStreamingMemoryHazards : HwMode<"", [Predicate<"false">]>;
+
+def PPRSpillFillRI : RegInfoByHwMode<
+ [DefaultMode, SMEWithStreamingMemoryHazards],
+ [RegInfo<16,16,16>, RegInfo<16,128,128>]>;
+
class PPRClass<int firstreg, int lastreg, int step = 1> : RegisterClass<"AArch64",
[ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1 ], 16,
(sequence "P%u", firstreg, lastreg, step)> {
- let Size = 16;
+ let RegInfos = PPRSpillFillRI;
}
def PPR : PPRClass<0, 15> {
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index bc921f07e1dbf8..5864f57582e21c 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -86,6 +86,11 @@ static cl::alias AArch64StreamingStackHazardSize(
cl::desc("alias for -aarch64-streaming-hazard-size"),
cl::aliasopt(AArch64StreamingHazardSize));
+static cl::opt<bool> EnableZPRPredicateSpills(
+ "aarch64-enable-zpr-predicate-spills", cl::init(false), cl::Hidden,
+ cl::desc(
+ "Enables spilling/reloading SVE predicates as data vectors (ZPRs)"));
+
// Subreg liveness tracking is disabled by default for now until all issues
// are ironed out. This option allows the feature to be used in tests.
static cl::opt<bool>
@@ -400,6 +405,23 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
EnableSubregLiveness = EnableSubregLivenessTracking.getValue();
}
+unsigned AArch64Subtarget::getHwModeSet() const {
+ unsigned Modes = 0;
+
+ // Use a special hardware mode in streaming functions with stack hazards.
+ // This changes the spill size (and alignment) for the predicate register
+ // class.
+ //
+ // FIXME: This overrides the table-gen'd `getHwModeSet()` which only looks at
+ // CPU features.
+ if (EnableZPRPredicateSpills.getValue() &&
+ (isStreaming() || isStreamingCompatible())) {
+ Modes |= (1 << 0);
+ }
+
+ return Modes;
+}
+
const CallLowering *AArch64Subtarget::getCallLowering() const {
return CallLoweringInfo.get();
}
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index d22991224d496d..e7757907a66434 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -130,6 +130,8 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
bool IsStreaming = false, bool IsStreamingCompatible = false,
bool HasMinSize = false);
+ virtual unsigned getHwModeSet() const override;
+
// Getters for SubtargetFeatures defined in tablegen
#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
bool GETTER() const { return ATTRIBUTE; }
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 81004e70dc179b..e1b34dfc895262 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -59,6 +59,20 @@ def FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO :
let hasPostISelHook = 1;
}
+def SPILL_PPR_TO_ZPR_SLOT_PSEUDO :
+ Pseudo<(outs), (ins PPRorPNRAny:$Pt, GPR64sp:$Rn, simm9:$imm9), []>, Sched<[]>
+{
+ let mayStore = 1;
+ let hasSideEffects = 0;
+}
+
+def FILL_PPR_FROM_ZPR_SLOT_PSEUDO :
+ Pseudo<(outs PPRorPNRAny:$Pt), (ins GPR64sp:$Rn, simm9:$imm9), []>, Sched<[]>
+{
+ let mayLoad = 1;
+ let hasSideEffects = 0;
+}
+
def SDTZALoadStore : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>]>;
def AArch64SMELdr : SDNode<"AArch64ISD::SME_ZA_LDR", SDTZALoadStore,
[SDNPHasChain, SDNPSideEffect, SDNPMayLoad]>;
diff --git a/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir b/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir
new file mode 100644
index 00000000000000..a432a61384e42a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir
@@ -0,0 +1,1035 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=aarch64-linux-gnu -aarch64-enable-zpr-predicate-spills -run-pass=greedy %s -o - | FileCheck %s
+# RUN: llc -mtriple=aarch64-linux-gnu -aarch64-enable-zpr-predicate-spills -start-before=greedy -stop-after=aarch64-expand-pseudo -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=EXPAND
+--- |
+ source_filename = "<stdin>"
+ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+ target triple = "aarch64--linux-gnu"
+
+ define aarch64_sve_vector_pcs void @zpr_predicate_spill() #0 { entry: unreachable }
+
+ define aarch64_sve_vector_pcs void @zpr_predicate_spill__save_restore_nzcv() #0 { entry: unreachable }
+
+ define aarch64_sve_vector_pcs void @zpr_predicate_spill__save_restore_nzcv__spill_gpr() #0 { entry: unreachable }
+
+ define aarch64_sve_vector_pcs void @zpr_predicate_spill__spill_zpr() #0 { entry: unreachable }
+
+ define aarch64_sve_vector_pcs void @zpr_predicate_spill_above_p7() #0 { entry: unreachable }
+
+ define aarch64_sve_vector_pcs void @zpr_predicate_spill_p4_saved() #0 { entry: unreachable }
+
+ attributes #0 = {nounwind "target-features"="+sme,+sve" "aarch64_pstate_sm_compatible"}
+...
+---
+name: zpr_predicate_spill
+tracksRegLiveness: true
+stack:
+liveins:
+ - { reg: '$p0' }
+body: |
+ bb.0.entry:
+ liveins: $p0
+
+ ; CHECK-LABEL: name: zpr_predicate_spill
+ ; CHECK: stack:
+ ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16,
+ ; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register:
+ ; CHECK: liveins: $p0
+ ; CHECK-NEXT: {{ $}}
+ ;
+ ; CHECK-NEXT: SPILL_PPR_TO_ZPR_SLOT_PSEUDO $p0, %stack.0, 0 :: (store (s128) into %stack.0)
+ ;
+ ; CHECK-NEXT: $p0 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p1 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p2 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p3 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p4 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p5 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p6 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p7 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p8 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p9 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p10 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p11 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p12 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p13 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p14 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p15 = IMPLICIT_DEF
+ ;
+ ; CHECK-NEXT: $p0 = FILL_PPR_FROM_ZPR_SLOT_PSEUDO %stack.0, 0 :: (load (s128) from %stack.0)
+ ; CHECK-NEXT: RET_ReallyLR implicit $p0
+
+ ; EXPAND-LABEL: name: zpr_predicate_spill
+ ; EXPAND: liveins: $p0, $fp, $p15, $p14, $p13, $p12, $p11, $p10, $p9, $p8, $p7, $p6, $p5, $p4
+ ; EXPAND-NEXT: {{ $}}
+ ;
+ ; EXPAND-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.13)
+ ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -12, implicit $vg
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p15, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.12)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p14, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.11)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p13, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 2 :: (store (s128) into %stack.10)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p12, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 3 :: (store (s128) into %stack.9)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p11, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 4 :: (store (s128) into %stack.8)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p10, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 5 :: (store (s128) into %stack.7)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p9, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 6 :: (store (s128) into %stack.6)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p8, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 7 :: (store (s128) into %stack.5)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p7, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 8 :: (store (s128) into %stack.4)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p6, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 9 :: (store (s128) into %stack.3)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p5, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 10 :: (store (s128) into %stack.2)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p4, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 11 :: (store (s128) into %stack.1)
+ ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg
+ ;
+ ; EXPAND-NEXT: $z0 = CPY_ZPzI_B $p0, 1, 0
+ ; EXPAND-NEXT: STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.0)
+ ;
+ ; EXPAND-NEXT: $p0 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p1 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p2 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p3 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p4 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p5 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p6 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p7 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p8 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p9 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p10 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p11 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p12 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p13 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p14 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p15 = IMPLICIT_DEF
+ ;
+ ; EXPAND-NEXT: $z0 = LDR_ZXI $sp, 0 :: (load (s128) from %stack.0)
+ ; EXPAND-NEXT: $p0 = PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p0 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ;
+ ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.12)
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p15 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.11)
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p14 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.10)
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p13 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.9)
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p12 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.8)
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p11 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.7)
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p10 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.6)
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p9 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.5)
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.4)
+ ; EXPAND-NEXT: $p7 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p7, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.3)
+ ; EXPAND-NEXT: $p6 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p6, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.2)
+ ; EXPAND-NEXT: $p5 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p5, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.1)
+ ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 12, implicit $vg
+ ; EXPAND-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.13)
+ ; EXPAND-NEXT: RET undef $lr, implicit $p0
+ %1:ppr = COPY $p0
+
+ $p0 = IMPLICIT_DEF
+ $p1 = IMPLICIT_DEF
+ $p2 = IMPLICIT_DEF
+ $p3 = IMPLICIT_DEF
+ $p4 = IMPLICIT_DEF
+ $p5 = IMPLICIT_DEF
+ $p6 = IMPLICIT_DEF
+ $p7 = IMPLICIT_DEF
+ $p8 = IMPLICIT_DEF
+ $p9 = IMPLICIT_DEF
+ $p10 = IMPLICIT_DEF
+ $p11 = IMPLICIT_DEF
+ $p12 = IMPLICIT_DEF
+ $p13 = IMPLICIT_DEF
+ $p14 = IMPLICIT_DEF
+ $p15 = IMPLICIT_DEF
+
+ $p0 = COPY %1
+
+ RET_ReallyLR implicit $p0
+...
+---
+name: zpr_predicate_spill__save_restore_nzcv
+tracksRegLiveness: true
+stack:
+liveins:
+ - { reg: '$p0' }
+body: |
+ bb.0.entry:
+ liveins: $p0
+
+ ; CHECK-LABEL: name: zpr_predicate_spill__save_restore_nzcv
+ ; CHECK: stack:
+ ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16,
+ ; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register:
+ ; CHECK: liveins: $p0
+ ; CHECK-NEXT: {{ $}}
+ ;
+ ; CHECK-NEXT: $nzcv = IMPLICIT_DEF
+ ;
+ ; CHECK-NEXT: SPILL_PPR_TO_ZPR_SLOT_PSEUDO $p0, %stack.0, 0 :: (store (s128) into %stack.0)
+ ;
+ ; CHECK-NEXT: $p0 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p1 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p2 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p3 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p4 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p5 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p6 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p7 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p8 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p9 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p10 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p11 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p12 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p13 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p14 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p15 = IMPLICIT_DEF
+ ;
+ ; CHECK-NEXT: $p0 = FILL_PPR_FROM_ZPR_SLOT_PSEUDO %stack.0, 0 :: (load (s128) from %stack.0)
+ ;
+ ; CHECK-NEXT: FAKE_USE implicit $nzcv
+ ; CHECK-NEXT: RET_ReallyLR implicit $p0
+
+ ; EXPAND-LABEL: name: zpr_predicate_spill__save_restore_nzcv
+ ; EXPAND: liveins: $p0, $fp, $p15, $p14, $p13, $p12, $p11, $p10, $p9, $p8, $p7, $p6, $p5, $p4
+ ; EXPAND-NEXT: {{ $}}
+ ;
+ ; EXPAND-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.13)
+ ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -12, implicit $vg
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p15, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.12)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p14, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.11)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p13, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 2 :: (store (s128) into %stack.10)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p12, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 3 :: (store (s128) into %stack.9)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p11, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 4 :: (store (s128) into %stack.8)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p10, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 5 :: (store (s128) into %stack.7)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p9, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 6 :: (store (s128) into %stack.6)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p8, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 7 :: (store (s128) into %stack.5)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p7, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 8 :: (store (s128) into %stack.4)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p6, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 9 :: (store (s128) into %stack.3)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p5, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 10 :: (store (s128) into %stack.2)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p4, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 11 :: (store (s128) into %stack.1)
+ ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg
+ ;
+ ; EXPAND-NEXT: $nzcv = IMPLICIT_DEF
+ ;
+ ; EXPAND-NEXT: $z0 = CPY_ZPzI_B $p0, 1, 0
+ ; EXPAND-NEXT: STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.0)
+ ;
+ ; EXPAND-NEXT: $p0 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p1 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p2 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p3 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p4 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p5 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p6 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p7 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p8 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p9 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p10 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p11 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p12 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p13 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p14 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p15 = IMPLICIT_DEF
+ ;
+ ; EXPAND-NEXT: $z0 = LDR_ZXI $sp, 0 :: (load (s128) from %stack.0)
+ ; EXPAND-NEXT: $x0 = MRS 55824, implicit-def $nzcv, implicit $nzcv
+ ; EXPAND-NEXT: $p0 = PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p0 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: MSR 55824, $x0, implicit-def $nzcv
+ ;
+ ; EXPAND-NEXT: FAKE_USE implicit $nzcv
+ ;
+ ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.12)
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p15 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.11)
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p14 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.10)
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p13 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.9)
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p12 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.8)
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p11 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.7)
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p10 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.6)
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p9 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.5)
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.4)
+ ; EXPAND-NEXT: $p7 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p7, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.3)
+ ; EXPAND-NEXT: $p6 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p6, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.2)
+ ; EXPAND-NEXT: $p5 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p5, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.1)
+ ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 12, implicit $vg
+ ; EXPAND-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.13)
+ ; EXPAND-NEXT: RET undef $lr, implicit $p0
+ $nzcv = IMPLICIT_DEF
+
+ %1:ppr = COPY $p0
+
+ $p0 = IMPLICIT_DEF
+ $p1 = IMPLICIT_DEF
+ $p2 = IMPLICIT_DEF
+ $p3 = IMPLICIT_DEF
+ $p4 = IMPLICIT_DEF
+ $p5 = IMPLICIT_DEF
+ $p6 = IMPLICIT_DEF
+ $p7 = IMPLICIT_DEF
+ $p8 = IMPLICIT_DEF
+ $p9 = IMPLICIT_DEF
+ $p10 = IMPLICIT_DEF
+ $p11 = IMPLICIT_DEF
+ $p12 = IMPLICIT_DEF
+ $p13 = IMPLICIT_DEF
+ $p14 = IMPLICIT_DEF
+ $p15 = IMPLICIT_DEF
+
+ $p0 = COPY %1
+
+ FAKE_USE implicit $nzcv
+
+ RET_ReallyLR implicit $p0
+...
+---
+name: zpr_predicate_spill__save_restore_nzcv__spill_gpr
+tracksRegLiveness: true
+stack:
+liveins:
+ - { reg: '$p0' }
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+ - { reg: '$x2' }
+ - { reg: '$x3' }
+ - { reg: '$x4' }
+ - { reg: '$x5' }
+ - { reg: '$x6' }
+ - { reg: '$x7' }
+body: |
+ bb.0.entry:
+ liveins: $p0, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7
+
+ ; CHECK-LABEL: name: zpr_predicate_spill__save_restore_nzcv__spill_gpr
+ ; CHECK: stack:
+ ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16,
+ ; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register:
+ ; CHECK: liveins: $p0, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7
+ ; CHECK-NEXT: {{ $}}
+ ;
+ ; CHECK-NEXT: $nzcv = IMPLICIT_DEF
+ ; CHECK-NEXT: $x8 = IMPLICIT_DEF
+ ; CHECK-NEXT: $x9 = IMPLICIT_DEF
+ ; CHECK-NEXT: $x10 = IMPLICIT_DEF
+ ; CHECK-NEXT: $x11 = IMPLICIT_DEF
+ ; CHECK-NEXT: $x12 = IMPLICIT_DEF
+ ; CHECK-NEXT: $x13 = IMPLICIT_DEF
+ ; CHECK-NEXT: $x14 = IMPLICIT_DEF
+ ; CHECK-NEXT: $x15 = IMPLICIT_DEF
+ ; CHECK-NEXT: $x16 = IMPLICIT_DEF
+ ; CHECK-NEXT: $x17 = IMPLICIT_DEF
+ ; CHECK-NEXT: $x18 = IMPLICIT_DEF
+ ;
+ ; CHECK-NEXT: SPILL_PPR_TO_ZPR_SLOT_PSEUDO $p0, %stack.0, 0 :: (store (s128) into %stack.0)
+ ;
+ ; CHECK-NEXT: $p0 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p1 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p2 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p3 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p4 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p5 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p6 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p7 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p8 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p9 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p10 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p11 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p12 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p13 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p14 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p15 = IMPLICIT_DEF
+ ;
+ ; CHECK-NEXT: $p0 = FILL_PPR_FROM_ZPR_SLOT_PSEUDO %stack.0, 0 :: (load (s128) from %stack.0)
+ ;
+ ; CHECK-NEXT: FAKE_USE implicit $nzcv, implicit $x8, implicit $x9, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $x18
+ ; CHECK-NEXT: RET_ReallyLR implicit $p0, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $x18
+
+ ; EXPAND-LABEL: name: zpr_predicate_spill__save_restore_nzcv__spill_gpr
+ ; EXPAND: liveins: $p0, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $fp, $p15, $p14, $p13, $p12, $p11, $p10, $p9, $p8, $p7, $p6, $p5, $p4
+ ; EXPAND-NEXT: {{ $}}
+ ;
+ ; EXPAND-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.13)
+ ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -12, implicit $vg
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p15, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.12)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p14, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.11)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p13, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 2 :: (store (s128) into %stack.10)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p12, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 3 :: (store (s128) into %stack.9)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p11, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 4 :: (store (s128) into %stack.8)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p10, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 5 :: (store (s128) into %stack.7)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p9, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 6 :: (store (s128) into %stack.6)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p8, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 7 :: (store (s128) into %stack.5)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p7, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 8 :: (store (s128) into %stack.4)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p6, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 9 :: (store (s128) into %stack.3)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p5, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 10 :: (store (s128) into %stack.2)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p4, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 11 :: (store (s128) into %stack.1)
+ ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg
+ ;
+ ; EXPAND-NEXT: $nzcv = IMPLICIT_DEF
+ ; EXPAND-NEXT: $x8 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $x9 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $x10 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $x11 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $x12 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $x13 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $x14 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $x15 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $x16 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $x17 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $x18 = IMPLICIT_DEF
+ ;
+ ; EXPAND-NEXT: $z0 = CPY_ZPzI_B $p0, 1, 0
+ ; EXPAND-NEXT: STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.0)
+ ;
+ ; EXPAND-NEXT: $p0 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p1 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p2 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p3 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p4 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p5 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p6 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p7 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p8 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p9 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p10 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p11 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p12 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p13 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p14 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p15 = IMPLICIT_DEF
+ ;
+ ; EXPAND-NEXT: $fp = ADDVL_XXI $sp, 13, implicit $vg
+ ; EXPAND-NEXT: STRXui $x0, killed $fp, 1 :: (store (s64) into %stack.14)
+ ; EXPAND-NEXT: $z0 = LDR_ZXI $sp, 0 :: (load (s128) from %stack.0)
+ ; EXPAND-NEXT: $x0 = MRS 55824, implicit-def $nzcv, implicit $nzcv
+ ; EXPAND-NEXT: $p0 = PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p0 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: MSR 55824, $x0, implicit-def $nzcv
+ ; EXPAND-NEXT: $fp = ADDVL_XXI $sp, 13, implicit $vg
+ ; EXPAND-NEXT: $x0 = LDRXui killed $fp, 1 :: (load (s64) from %stack.14)
+ ;
+ ; EXPAND-NEXT: FAKE_USE implicit $nzcv, implicit $x8, implicit $x9, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $x18
+ ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.12)
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p15 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.11)
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p14 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.10)
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p13 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.9)
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p12 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.8)
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p11 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.7)
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p10 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.6)
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p9 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.5)
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.4)
+ ; EXPAND-NEXT: $p7 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p7, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.3)
+ ; EXPAND-NEXT: $p6 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p6, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.2)
+ ; EXPAND-NEXT: $p5 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p5, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.1)
+ ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 12, implicit $vg
+ ; EXPAND-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.13)
+ ; EXPAND-NEXT: RET undef $lr, implicit $p0, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $x18
+ $nzcv = IMPLICIT_DEF
+ $x8 = IMPLICIT_DEF
+ $x9 = IMPLICIT_DEF
+ $x10 = IMPLICIT_DEF
+ $x11 = IMPLICIT_DEF
+ $x12 = IMPLICIT_DEF
+ $x13 = IMPLICIT_DEF
+ $x14 = IMPLICIT_DEF
+ $x15 = IMPLICIT_DEF
+ $x16 = IMPLICIT_DEF
+ $x17 = IMPLICIT_DEF
+ $x18 = IMPLICIT_DEF
+
+ %1:ppr = COPY $p0
+
+ $p0 = IMPLICIT_DEF
+ $p1 = IMPLICIT_DEF
+ $p2 = IMPLICIT_DEF
+ $p3 = IMPLICIT_DEF
+ $p4 = IMPLICIT_DEF
+ $p5 = IMPLICIT_DEF
+ $p6 = IMPLICIT_DEF
+ $p7 = IMPLICIT_DEF
+ $p8 = IMPLICIT_DEF
+ $p9 = IMPLICIT_DEF
+ $p10 = IMPLICIT_DEF
+ $p11 = IMPLICIT_DEF
+ $p12 = IMPLICIT_DEF
+ $p13 = IMPLICIT_DEF
+ $p14 = IMPLICIT_DEF
+ $p15 = IMPLICIT_DEF
+
+ $p0 = COPY %1
+
+ FAKE_USE implicit $nzcv, implicit $x8, implicit $x9, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $x18
+
+ RET_ReallyLR implicit $p0, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $x18
+...
+---
+name: zpr_predicate_spill__spill_zpr
+tracksRegLiveness: true
+stack:
+liveins:
+ - { reg: '$p0' }
+ - { reg: '$z0' }
+ - { reg: '$z1' }
+ - { reg: '$z2' }
+ - { reg: '$z3' }
+ - { reg: '$z4' }
+ - { reg: '$z5' }
+ - { reg: '$z6' }
+ - { reg: '$z7' }
+body: |
+ bb.0.entry:
+ liveins: $p0, $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7
+
+ ; CHECK-LABEL: name: zpr_predicate_spill__spill_zpr
+ ; CHECK: stack:
+ ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16,
+ ; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register:
+ ; CHECK: liveins: $p0, $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7
+ ; CHECK-NEXT: {{ $}}
+ ;
+ ; CHECK-NEXT: $z16 = IMPLICIT_DEF
+ ; CHECK-NEXT: $z17 = IMPLICIT_DEF
+ ; CHECK-NEXT: $z18 = IMPLICIT_DEF
+ ; CHECK-NEXT: $z19 = IMPLICIT_DEF
+ ; CHECK-NEXT: $z20 = IMPLICIT_DEF
+ ; CHECK-NEXT: $z21 = IMPLICIT_DEF
+ ; CHECK-NEXT: $z22 = IMPLICIT_DEF
+ ; CHECK-NEXT: $z23 = IMPLICIT_DEF
+ ; CHECK-NEXT: $z24 = IMPLICIT_DEF
+ ; CHECK-NEXT: $z25 = IMPLICIT_DEF
+ ; CHECK-NEXT: $z26 = IMPLICIT_DEF
+ ; CHECK-NEXT: $z27 = IMPLICIT_DEF
+ ; CHECK-NEXT: $z28 = IMPLICIT_DEF
+ ; CHECK-NEXT: $z29 = IMPLICIT_DEF
+ ; CHECK-NEXT: $z30 = IMPLICIT_DEF
+ ; CHECK-NEXT: $z31 = IMPLICIT_DEF
+ ;
+ ; CHECK-NEXT: SPILL_PPR_TO_ZPR_SLOT_PSEUDO $p0, %stack.0, 0 :: (store (s128) into %stack.0)
+ ;
+ ; CHECK-NEXT: $p0 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p1 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p2 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p3 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p4 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p5 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p6 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p7 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p8 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p9 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p10 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p11 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p12 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p13 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p14 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p15 = IMPLICIT_DEF
+ ;
+ ; CHECK-NEXT: $p0 = FILL_PPR_FROM_ZPR_SLOT_PSEUDO %stack.0, 0 :: (load (s128) from %stack.0)
+ ;
+ ; CHECK-NEXT: FAKE_USE implicit $z16, implicit $z17, implicit $z18, implicit $z19, implicit $z20, implicit $z21, implicit $z22, implicit $z23, implicit $z24, implicit $z25, implicit $z26, implicit $z27, implicit $z28, implicit $z29, implicit $z30, implicit $z31
+ ; CHECK-NEXT: RET_ReallyLR implicit $p0, implicit $z0, implicit $z1, implicit $z2, implicit $z3, implicit $z4, implicit $z5, implicit $z6, implicit $z7
+
+ ; EXPAND-LABEL: name: zpr_predicate_spill__spill_zpr
+ ; EXPAND: liveins: $p0, $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7, $fp, $p15, $p14, $p13, $p12, $p11, $p10, $p9, $p8, $p7, $p6, $p5, $p4, $z23, $z22, $z21, $z20, $z19, $z18, $z17, $z16
+ ; EXPAND-NEXT: {{ $}}
+ ;
+ ; EXPAND-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.21)
+ ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -20, implicit $vg
+ ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p15, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 0 :: (store (s128) into %stack.20)
+ ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p14, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 1 :: (store (s128) into %stack.19)
+ ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p13, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 2 :: (store (s128) into %stack.18)
+ ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p12, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 3 :: (store (s128) into %stack.17)
+ ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p11, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 4 :: (store (s128) into %stack.16)
+ ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p10, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 5 :: (store (s128) into %stack.15)
+ ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p9, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 6 :: (store (s128) into %stack.14)
+ ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p8, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 7 :: (store (s128) into %stack.13)
+ ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p7, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 8 :: (store (s128) into %stack.12)
+ ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p6, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 9 :: (store (s128) into %stack.11)
+ ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p5, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 10 :: (store (s128) into %stack.10)
+ ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p4, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 11 :: (store (s128) into %stack.9)
+ ; EXPAND-NEXT: frame-setup STR_ZXI killed $z23, $sp, 12 :: (store (s128) into %stack.8)
+ ; EXPAND-NEXT: frame-setup STR_ZXI killed $z22, $sp, 13 :: (store (s128) into %stack.7)
+ ; EXPAND-NEXT: frame-setup STR_ZXI killed $z21, $sp, 14 :: (store (s128) into %stack.6)
+ ; EXPAND-NEXT: frame-setup STR_ZXI killed $z20, $sp, 15 :: (store (s128) into %stack.5)
+ ; EXPAND-NEXT: frame-setup STR_ZXI killed $z19, $sp, 16 :: (store (s128) into %stack.4)
+ ; EXPAND-NEXT: frame-setup STR_ZXI killed $z18, $sp, 17 :: (store (s128) into %stack.3)
+ ; EXPAND-NEXT: frame-setup STR_ZXI killed $z17, $sp, 18 :: (store (s128) into %stack.2)
+ ; EXPAND-NEXT: frame-setup STR_ZXI killed $z16, $sp, 19 :: (store (s128) into %stack.1)
+ ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2, implicit $vg
+ ;
+ ; EXPAND-NEXT: $z16 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $z17 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $z18 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $z19 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $z20 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $z21 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $z22 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $z23 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $z24 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $z25 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $z26 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $z27 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $z28 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $z29 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $z30 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $z31 = IMPLICIT_DEF
+ ;
+ ; EXPAND-NEXT: STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.22)
+ ; EXPAND-NEXT: $z0 = CPY_ZPzI_B $p0, 1, 0
+ ; EXPAND-NEXT: STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.0)
+ ; EXPAND-NEXT: $z0 = LDR_ZXI $sp, 0 :: (load (s128) from %stack.22)
+ ;
+ ; EXPAND-NEXT: $p0 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p1 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p2 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p3 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p4 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p5 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p6 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p7 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p8 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p9 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p10 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p11 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p12 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p13 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p14 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p15 = IMPLICIT_DEF
+ ;
+ ; EXPAND-NEXT: STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.22)
+ ; EXPAND-NEXT: $z0 = LDR_ZXI $sp, 1 :: (load (s128) from %stack.0)
+ ; EXPAND-NEXT: $p0 = PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p0 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = LDR_ZXI $sp, 0 :: (load (s128) from %stack.22)
+ ;
+ ; EXPAND-NEXT: FAKE_USE implicit $z16, implicit $z17, implicit $z18, implicit $z19, implicit $z20, implicit $z21, implicit $z22, implicit $z23, implicit $z24, implicit $z25, implicit $z26, implicit $z27, implicit $z28, implicit $z29, implicit $z30, implicit $z31
+ ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2, implicit $vg
+ ; EXPAND-NEXT: $z23 = frame-destroy LDR_ZXI $sp, 12 :: (load (s128) from %stack.8)
+ ; EXPAND-NEXT: $z22 = frame-destroy LDR_ZXI $sp, 13 :: (load (s128) from %stack.7)
+ ; EXPAND-NEXT: $z21 = frame-destroy LDR_ZXI $sp, 14 :: (load (s128) from %stack.6)
+ ; EXPAND-NEXT: $z20 = frame-destroy LDR_ZXI $sp, 15 :: (load (s128) from %stack.5)
+ ; EXPAND-NEXT: $z19 = frame-destroy LDR_ZXI $sp, 16 :: (load (s128) from %stack.4)
+ ; EXPAND-NEXT: $z18 = frame-destroy LDR_ZXI $sp, 17 :: (load (s128) from %stack.3)
+ ; EXPAND-NEXT: $z17 = frame-destroy LDR_ZXI $sp, 18 :: (load (s128) from %stack.2)
+ ; EXPAND-NEXT: $z16 = frame-destroy LDR_ZXI $sp, 19 :: (load (s128) from %stack.1)
+ ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.20)
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p15 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.19)
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p14 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.18)
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p13 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.17)
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p12 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.16)
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p11 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.15)
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p10 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.14)
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p9 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.13)
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.12)
+ ; EXPAND-NEXT: $p7 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p7, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.11)
+ ; EXPAND-NEXT: $p6 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p6, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.10)
+ ; EXPAND-NEXT: $p5 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p5, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.9)
+ ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 20, implicit $vg
+ ; EXPAND-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.21)
+ ; EXPAND-NEXT: RET undef $lr, implicit $p0, implicit $z0, implicit $z1, implicit $z2, implicit $z3, implicit $z4, implicit $z5, implicit $z6, implicit $z7
+ $z16 = IMPLICIT_DEF
+ $z17 = IMPLICIT_DEF
+ $z18 = IMPLICIT_DEF
+ $z19 = IMPLICIT_DEF
+ $z20 = IMPLICIT_DEF
+ $z21 = IMPLICIT_DEF
+ $z22 = IMPLICIT_DEF
+ $z23 = IMPLICIT_DEF
+ $z24 = IMPLICIT_DEF
+ $z25 = IMPLICIT_DEF
+ $z26 = IMPLICIT_DEF
+ $z27 = IMPLICIT_DEF
+ $z28 = IMPLICIT_DEF
+ $z29 = IMPLICIT_DEF
+ $z30 = IMPLICIT_DEF
+ $z31 = IMPLICIT_DEF
+
+ %1:ppr = COPY $p0
+
+ $p0 = IMPLICIT_DEF
+ $p1 = IMPLICIT_DEF
+ $p2 = IMPLICIT_DEF
+ $p3 = IMPLICIT_DEF
+ $p4 = IMPLICIT_DEF
+ $p5 = IMPLICIT_DEF
+ $p6 = IMPLICIT_DEF
+ $p7 = IMPLICIT_DEF
+ $p8 = IMPLICIT_DEF
+ $p9 = IMPLICIT_DEF
+ $p10 = IMPLICIT_DEF
+ $p11 = IMPLICIT_DEF
+ $p12 = IMPLICIT_DEF
+ $p13 = IMPLICIT_DEF
+ $p14 = IMPLICIT_DEF
+ $p15 = IMPLICIT_DEF
+
+ $p0 = COPY %1
+
+ FAKE_USE implicit $z16, implicit $z17, implicit $z18, implicit $z19, implicit $z20, implicit $z21, implicit $z22, implicit $z23, implicit $z24, implicit $z25, implicit $z26, implicit $z27, implicit $z28, implicit $z29, implicit $z30, implicit $z31
+
+ RET_ReallyLR implicit $p0, implicit $z0, implicit $z1, implicit $z2, implicit $z3, implicit $z4, implicit $z5, implicit $z6, implicit $z7
+...
+---
+name: zpr_predicate_spill_above_p7
+tracksRegLiveness: true
+stack:
+liveins:
+ - { reg: '$p0' }
+ - { reg: '$p1' }
+ - { reg: '$p2' }
+ - { reg: '$p3' }
+body: |
+ bb.0.entry:
+ liveins: $p0, $p1, $p2, $p3
+
+ ; CHECK-LABEL: name: zpr_predicate_spill_above_p7
+ ; CHECK: stack:
+ ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16,
+ ; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register:
+ ; CHECK: liveins: $p0, $p1, $p2, $p3
+ ; CHECK-NEXT: {{ $}}
+ ;
+ ; CHECK-NEXT: $p15 = IMPLICIT_DEF
+ ;
+ ; CHECK-NEXT: SPILL_PPR_TO_ZPR_SLOT_PSEUDO $p15, %stack.0, 0 :: (store (s128) into %stack.0)
+ ;
+ ; CHECK-NEXT: $p0 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p1 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p2 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p3 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p4 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p5 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p6 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p7 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p8 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p9 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p10 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p11 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p12 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p13 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p14 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p15 = IMPLICIT_DEF
+ ;
+ ; CHECK-NEXT: $p15 = FILL_PPR_FROM_ZPR_SLOT_PSEUDO %stack.0, 0 :: (load (s128) from %stack.0)
+ ;
+ ; CHECK-NEXT: FAKE_USE implicit $p4, implicit $p5, implicit $p6, implicit $p7
+ ; CHECK-NEXT: RET_ReallyLR implicit $p0, implicit $p1, implicit $p2, implicit $p3
+
+ ; EXPAND-LABEL: name: zpr_predicate_spill_above_p7
+ ; EXPAND: liveins: $p0, $p1, $p2, $p3, $fp, $p15, $p14, $p13, $p12, $p11, $p10, $p9, $p8, $p7, $p6, $p5, $p4
+ ; EXPAND-NEXT: {{ $}}
+ ;
+ ; EXPAND-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.13)
+ ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -12, implicit $vg
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p15, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.12)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p14, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.11)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p13, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 2 :: (store (s128) into %stack.10)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p12, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 3 :: (store (s128) into %stack.9)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p11, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 4 :: (store (s128) into %stack.8)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p10, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 5 :: (store (s128) into %stack.7)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p9, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 6 :: (store (s128) into %stack.6)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p8, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 7 :: (store (s128) into %stack.5)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p7, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 8 :: (store (s128) into %stack.4)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p6, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 9 :: (store (s128) into %stack.3)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p5, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 10 :: (store (s128) into %stack.2)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p4, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 11 :: (store (s128) into %stack.1)
+ ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2, implicit $vg
+ ;
+ ; EXPAND-NEXT: $p15 = IMPLICIT_DEF
+ ;
+ ; EXPAND-NEXT: $z0 = CPY_ZPzI_B $p15, 1, 0
+ ; EXPAND-NEXT: STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.0)
+ ;
+ ; EXPAND-NEXT: $p0 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p1 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p2 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p3 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p4 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p5 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p6 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p7 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p8 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p9 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p10 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p11 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p12 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p13 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p14 = IMPLICIT_DEF
+ ; EXPAND-NEXT: $p15 = IMPLICIT_DEF
+ ;
+ ; EXPAND-NEXT: $z0 = CPY_ZPzI_B $p0, 1, 0
+ ; EXPAND-NEXT: STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.14)
+ ; EXPAND-NEXT: $z0 = LDR_ZXI $sp, 1 :: (load (s128) from %stack.0)
+ ; EXPAND-NEXT: $p0 = PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p15 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = LDR_ZXI $sp, 0 :: (load (s128) from %stack.14)
+ ; EXPAND-NEXT: $p0 = PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p0 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ;
+ ; EXPAND-NEXT: FAKE_USE implicit $p4, implicit $p5, implicit $p6, implicit $p7
+ ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2, implicit $vg
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.12)
+ ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p15 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.11)
+ ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p14 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.10)
+ ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p13 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.9)
+ ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p12 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.8)
+ ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p11 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.7)
+ ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p10 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.6)
+ ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p9 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.5)
+ ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.4)
+ ; EXPAND-NEXT: $p7 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p7, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.3)
+ ; EXPAND-NEXT: $p6 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p6, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.2)
+ ; EXPAND-NEXT: $p5 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p5, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.1)
+ ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 12, implicit $vg
+ ; EXPAND-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.13)
+ ; EXPAND-NEXT: RET undef $lr, implicit $p0, implicit $p1, implicit $p2, implicit $p3
+ $p15 = IMPLICIT_DEF
+ %1:ppr = COPY $p15
+
+ $p0 = IMPLICIT_DEF
+ $p1 = IMPLICIT_DEF
+ $p2 = IMPLICIT_DEF
+ $p3 = IMPLICIT_DEF
+ $p4 = IMPLICIT_DEF
+ $p5 = IMPLICIT_DEF
+ $p6 = IMPLICIT_DEF
+ $p7 = IMPLICIT_DEF
+ $p8 = IMPLICIT_DEF
+ $p9 = IMPLICIT_DEF
+ $p10 = IMPLICIT_DEF
+ $p11 = IMPLICIT_DEF
+ $p12 = IMPLICIT_DEF
+ $p13 = IMPLICIT_DEF
+ $p14 = IMPLICIT_DEF
+ $p15 = IMPLICIT_DEF
+
+ $p15 = COPY %1
+
+ FAKE_USE implicit $p4, implicit $p5, implicit $p6, implicit $p7
+
+ RET_ReallyLR implicit $p0, implicit $p1, implicit $p2, implicit $p3
+...
+---
+name: zpr_predicate_spill_p4_saved
+tracksRegLiveness: true
+stack:
+liveins:
+ - { reg: '$p0' }
+ - { reg: '$p1' }
+ - { reg: '$p2' }
+ - { reg: '$p3' }
+body: |
+ bb.0.entry:
+ liveins: $p0, $p1, $p2, $p3
+
+ ; CHECK-LABEL: name: zpr_predicate_spill_p4_saved
+ ; CHECK: liveins: $p0, $p1, $p2, $p3
+ ; CHECK-NEXT: {{ $}}
+ ;
+ ; CHECK-NEXT: $p8 = IMPLICIT_DEF
+ ;
+ ; CHECK-NEXT: RET_ReallyLR implicit $p0, implicit $p1, implicit $p2, implicit $p3
+
+ ; EXPAND-LABEL: name: zpr_predicate_spill_p4_saved
+ ; EXPAND: liveins: $p0, $p1, $p2, $p3, $fp, $p8, $p4
+ ; EXPAND-NEXT: {{ $}}
+ ; EXPAND-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.2)
+ ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2, implicit $vg
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p8, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.1)
+ ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p4, 1, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.0)
+ ;
+ ; EXPAND-NEXT: $p8 = IMPLICIT_DEF
+ ;
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.1)
+ ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.0)
+ ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2, implicit $vg
+ ; EXPAND-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2)
+ ; EXPAND-NEXT: RET undef $lr, implicit $p0, implicit $p1, implicit $p2, implicit $p3
+
+ ; If we spill a register above p8, p4 must also be saved, so we can guarantee
+ ; they will be a register (in the range p0-p7 to for the cmpne reload).
+ $p8 = IMPLICIT_DEF
+
+ RET_ReallyLR implicit $p0, implicit $p1, implicit $p2, implicit $p3
+...
diff --git a/llvm/test/CodeGen/AArch64/ssve-stack-hazard-remarks.ll b/llvm/test/CodeGen/AArch64/ssve-stack-hazard-remarks.ll
index 0b6bf3892a0c2b..c67d91952c6188 100644
--- a/llvm/test/CodeGen/AArch64/ssve-stack-hazard-remarks.ll
+++ b/llvm/test/CodeGen/AArch64/ssve-stack-hazard-remarks.ll
@@ -1,5 +1,7 @@
; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -pass-remarks-analysis=sme -aarch64-stack-hazard-remark-size=64 -o /dev/null < %s 2>&1 | FileCheck %s --check-prefixes=CHECK
; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -pass-remarks-analysis=sme -aarch64-stack-hazard-size=1024 -o /dev/null < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-PADDING
+; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -pass-remarks-analysis=sme -aarch64-enable-zpr-predicate-spills -aarch64-stack-hazard-remark-size=64 -o /dev/null < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-ZPR-PRED-SPILLS
+; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -pass-remarks-analysis=sme -aarch64-enable-zpr-predicate-spills -aarch64-stack-hazard-size=1024 -o /dev/null < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-ZPR-PRED-SPILLS-WITH-PADDING
; Don't emit remarks for non-streaming functions.
define float @csr_x20_stackargs_notsc(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i) {
@@ -66,13 +68,18 @@ entry:
}
; SVE calling conventions
-; Predicate register spills end up in FP region, currently.
+; Predicate register spills end up in FP region, currently. This can be
+; mitigated with the -aarch64-enable-zpr-predicate-spills option.
define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, <vscale x 16 x i8> %P3, i16 %P4) #2 {
; CHECK: remark: <unknown>:0:0: stack hazard in 'svecc_call': PPR stack object at [SP-48-258 * vscale] is too close to FPR stack object at [SP-48-256 * vscale]
; CHECK: remark: <unknown>:0:0: stack hazard in 'svecc_call': FPR stack object at [SP-48-16 * vscale] is too close to GPR stack object at [SP-48]
; CHECK-PADDING: remark: <unknown>:0:0: stack hazard in 'svecc_call': PPR stack object at [SP-1072-258 * vscale] is too close to FPR stack object at [SP-1072-256 * vscale]
; CHECK-PADDING-NOT: remark: <unknown>:0:0: stack hazard in 'svecc_call':
+; CHECK-ZPR-PRED-SPILLS-NOT: <unknown>:0:0: stack hazard in 'svecc_call': PPR stack object at {{.*}} is too close to FPR stack object
+; CHECK-ZPR-PRED-SPILLS: <unknown>:0:0: stack hazard in 'svecc_call': FPR stack object at [SP-48-16 * vscale] is too close to GPR stack object at [SP-48]
+; CHECK-ZPR-PRED-SPILLS-WITH-PADDING-NOT: <unknown>:0:0: stack hazard in 'svecc_call': PPR stack object at {{.*}} is too close to FPR stack object
+; CHECK-ZPR-PRED-SPILLS-WITH-PADDING-NOT: <unknown>:0:0: stack hazard in 'svecc_call': FPR stack object at {{.*}} is too close to GPR stack object
entry:
tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2
%call = call ptr @memset(ptr noundef nonnull %P1, i32 noundef 45, i32 noundef 37)
@@ -84,6 +91,10 @@ define i32 @svecc_alloca_call(<4 x i16> %P0, ptr %P1, i32 %P2, <vscale x 16 x i8
; CHECK: remark: <unknown>:0:0: stack hazard in 'svecc_alloca_call': FPR stack object at [SP-48-16 * vscale] is too close to GPR stack object at [SP-48]
; CHECK-PADDING: remark: <unknown>:0:0: stack hazard in 'svecc_alloca_call': PPR stack object at [SP-1072-258 * vscale] is too close to FPR stack object at [SP-1072-256 * vscale]
; CHECK-PADDING-NOT: remark: <unknown>:0:0: stack hazard in 'svecc_alloca_call':
+; CHECK-ZPR-PRED-SPILLS-NOT: <unknown>:0:0: stack hazard in 'svecc_call': PPR stack object at {{.*}} is too close to FPR stack object
+; CHECK-ZPR-PRED-SPILLS: <unknown>:0:0: stack hazard in 'svecc_alloca_call': FPR stack object at [SP-48-16 * vscale] is too close to GPR stack object at [SP-48]
+; CHECK-ZPR-PRED-SPILLS-WITH-PADDING-NOT: <unknown>:0:0: stack hazard in 'svecc_alloca_call': PPR stack object at {{.*}} is too close to FPR stack object
+; CHECK-ZPR-PRED-SPILLS-WITH-PADDING-NOT: <unknown>:0:0: stack hazard in 'svecc_alloca_call': FPR stack object at {{.*}} is too close to GPR stack object
entry:
tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2
%0 = alloca [37 x i8], align 16
>From 2a74bc6aac8d413b2d305a634a314da2aefd0ee1 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Thu, 23 Jan 2025 16:45:52 +0000
Subject: [PATCH 2/5] Fixups
---
.../Target/AArch64/AArch64FrameLowering.cpp | 27 +-
llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 2 +-
.../AArch64/spill-fill-zpr-predicates.mir | 388 ++++++++++--------
3 files changed, 223 insertions(+), 194 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 64c3ecaf21ea31..a2eacc69aee71b 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -4332,13 +4332,9 @@ static bool expandFillPPRFromZPRSlotPseudo(
UsedRegs, ZPRRegs, SpillSlots.ZPRSpillFI);
Register PredReg = AArch64::NoRegister;
- std::optional<ScopedScavengeOrSpill> FindPPR3bReg;
- if (AArch64::PPR_3bRegClass.contains(MI.getOperand(0).getReg()))
- PredReg = MI.getOperand(0).getReg();
- else
- FindPPR3bReg.emplace(MF, MBB, MachineBasicBlock::iterator(MI), PredReg,
- AArch64::P0, AArch64::PPR_3bRegClass, UsedRegs,
- PPR3bRegs, SpillSlots.PPRSpillFI);
+ ScopedScavengeOrSpill FindPPR3bReg(
+ MF, MBB, MachineBasicBlock::iterator(MI), PredReg, AArch64::P0,
+ AArch64::PPR_3bRegClass, UsedRegs, PPR3bRegs, SpillSlots.PPRSpillFI);
// Elide NZCV spills if we know it is not used.
Register NZCVSaveReg = AArch64::NoRegister;
@@ -4350,8 +4346,7 @@ static bool expandFillPPRFromZPRSlotPseudo(
SpillSlots.GPRSpillFI);
#ifndef NDEBUG
- bool Spilled = FindZPRReg.hasSpilled() ||
- (FindPPR3bReg && FindPPR3bReg->hasSpilled()) ||
+ bool Spilled = FindZPRReg.hasSpilled() || FindPPR3bReg.hasSpilled() ||
(FindGPRReg && FindGPRReg->hasSpilled());
bool InPrologueOrEpilogue = MI.getFlag(MachineInstr::FrameSetup) ||
MI.getFlag(MachineInstr::FrameDestroy);
@@ -4393,7 +4388,7 @@ static bool expandFillPPRFromZPRSlotPseudo(
.getInstr());
propagateFrameFlags(MI, MachineInstrs);
- return FindPPR3bReg && FindPPR3bReg->hasSpilled();
+ return FindPPR3bReg.hasSpilled();
}
/// Expands all FILL_PPR_FROM_ZPR_SLOT_PSEUDO and SPILL_PPR_TO_ZPR_SLOT_PSEUDO
@@ -4446,6 +4441,7 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
if (CSRMask)
ScavengeableRegs.clearBitsInMask(CSRMask);
// TODO: Allow reusing callee-saved registers that have been saved.
+ assert(ScavengeableRegs.count() > 0 && "Expected scavengeable registers");
return ScavengeableRegs;
};
@@ -4471,9 +4467,15 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
EmergencyStackSlots SpillSlots;
for (MachineBasicBlock &MBB : MF) {
+ // In the case we had to spill a predicate (in the range p0-p7) to reload
+ // a predicate (>= p8), additional spill/fill pseudos will be created.
+ // These need an additional expansion pass. Note: There will only be at
+ // most two expansion passes, as spilling/filling a predicate in the range
+ // p0-p7 never requires spilling another predicate.
for (int Pass = 0; Pass < 2; Pass++) {
bool HasPPRSpills = expandSMEPPRToZPRSpillPseudos(
MBB, TRI, ZPRRegs, PPR3bRegs, GPRRegs, SpillSlots);
+ assert((Pass == 0 || !HasPPRSpills) && "Did not expect PPR spills");
if (!HasPPRSpills)
break;
}
@@ -5524,9 +5526,8 @@ void AArch64FrameLowering::emitRemarks(
if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector) {
// SPILL_PPR_TO_ZPR_SLOT_PSEUDO and FILL_PPR_FROM_ZPR_SLOT_PSEUDO
// spill/fill the predicate as a data vector (so are an FPR acess).
- if (!is_contained({AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO,
- AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO},
- MI.getOpcode()) &&
+ if (MI.getOpcode() != AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO &&
+ MI.getOpcode() != AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO &&
AArch64::PPRRegClass.contains(MI.getOperand(0).getReg()))
RegTy = StackAccess::PPR;
else
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 5864f57582e21c..34d05c6457e057 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -414,7 +414,7 @@ unsigned AArch64Subtarget::getHwModeSet() const {
//
// FIXME: This overrides the table-gen'd `getHwModeSet()` which only looks at
// CPU features.
- if (EnableZPRPredicateSpills.getValue() &&
+ if (EnableZPRPredicateSpills.getValue() && getStreamingHazardSize() > 0 &&
(isStreaming() || isStreamingCompatible())) {
Modes |= (1 << 0);
}
diff --git a/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir b/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir
index a432a61384e42a..8aa957f04efc07 100644
--- a/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir
+++ b/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir
@@ -31,7 +31,6 @@ body: |
liveins: $p0
; CHECK-LABEL: name: zpr_predicate_spill
- ; CHECK: stack:
; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16,
; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register:
; CHECK: liveins: $p0
@@ -57,42 +56,46 @@ body: |
; CHECK-NEXT: $p15 = IMPLICIT_DEF
;
; CHECK-NEXT: $p0 = FILL_PPR_FROM_ZPR_SLOT_PSEUDO %stack.0, 0 :: (load (s128) from %stack.0)
+ ;
; CHECK-NEXT: RET_ReallyLR implicit $p0
; EXPAND-LABEL: name: zpr_predicate_spill
; EXPAND: liveins: $p0, $fp, $p15, $p14, $p13, $p12, $p11, $p10, $p9, $p8, $p7, $p6, $p5, $p4
; EXPAND-NEXT: {{ $}}
;
- ; EXPAND-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.13)
+ ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1040, 0
+ ; EXPAND-NEXT: frame-setup STRXui killed $fp, $sp, 128 :: (store (s64) into %stack.14)
; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -12, implicit $vg
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p15, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.12)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.13)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p14, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.11)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.12)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p13, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 2 :: (store (s128) into %stack.10)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 2 :: (store (s128) into %stack.11)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p12, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 3 :: (store (s128) into %stack.9)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 3 :: (store (s128) into %stack.10)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p11, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 4 :: (store (s128) into %stack.8)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 4 :: (store (s128) into %stack.9)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p10, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 5 :: (store (s128) into %stack.7)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 5 :: (store (s128) into %stack.8)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p9, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 6 :: (store (s128) into %stack.6)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 6 :: (store (s128) into %stack.7)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p8, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 7 :: (store (s128) into %stack.5)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 7 :: (store (s128) into %stack.6)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p7, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 8 :: (store (s128) into %stack.4)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 8 :: (store (s128) into %stack.5)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p6, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 9 :: (store (s128) into %stack.3)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 9 :: (store (s128) into %stack.4)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p5, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 10 :: (store (s128) into %stack.2)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 10 :: (store (s128) into %stack.3)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p4, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 11 :: (store (s128) into %stack.1)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 11 :: (store (s128) into %stack.2)
+ ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1024, 0
; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg
;
; EXPAND-NEXT: $z0 = CPY_ZPzI_B $p0, 1, 0
- ; EXPAND-NEXT: STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.0)
+ ; EXPAND-NEXT: $x8 = ADDXri $sp, 1024, 0
+ ; EXPAND-NEXT: STR_ZXI $z0, $x8, 0 :: (store (s128) into %stack.0)
;
; EXPAND-NEXT: $p0 = IMPLICIT_DEF
; EXPAND-NEXT: $p1 = IMPLICIT_DEF
@@ -111,49 +114,51 @@ body: |
; EXPAND-NEXT: $p14 = IMPLICIT_DEF
; EXPAND-NEXT: $p15 = IMPLICIT_DEF
;
- ; EXPAND-NEXT: $z0 = LDR_ZXI $sp, 0 :: (load (s128) from %stack.0)
+ ; EXPAND-NEXT: $z0 = LDR_ZXI killed $x8, 0 :: (load (s128) from %stack.0)
; EXPAND-NEXT: $p0 = PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p0 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
;
+ ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1024, 0
; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.12)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.13)
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p15 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.11)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.12)
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p14 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.10)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.11)
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p13 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.9)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.10)
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p12 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.8)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.9)
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p11 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.7)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.8)
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p10 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.6)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.7)
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p9 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.5)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.6)
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.4)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.5)
; EXPAND-NEXT: $p7 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p7, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.3)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.4)
; EXPAND-NEXT: $p6 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p6, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.2)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.3)
; EXPAND-NEXT: $p5 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p5, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.1)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.2)
; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 12, implicit $vg
- ; EXPAND-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.13)
+ ; EXPAND-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.14)
+ ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0
; EXPAND-NEXT: RET undef $lr, implicit $p0
%1:ppr = COPY $p0
@@ -189,7 +194,6 @@ body: |
liveins: $p0
; CHECK-LABEL: name: zpr_predicate_spill__save_restore_nzcv
- ; CHECK: stack:
; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16,
; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register:
; CHECK: liveins: $p0
@@ -219,44 +223,48 @@ body: |
; CHECK-NEXT: $p0 = FILL_PPR_FROM_ZPR_SLOT_PSEUDO %stack.0, 0 :: (load (s128) from %stack.0)
;
; CHECK-NEXT: FAKE_USE implicit $nzcv
+ ;
; CHECK-NEXT: RET_ReallyLR implicit $p0
; EXPAND-LABEL: name: zpr_predicate_spill__save_restore_nzcv
; EXPAND: liveins: $p0, $fp, $p15, $p14, $p13, $p12, $p11, $p10, $p9, $p8, $p7, $p6, $p5, $p4
; EXPAND-NEXT: {{ $}}
;
- ; EXPAND-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.13)
+ ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1040, 0
+ ; EXPAND-NEXT: frame-setup STRXui killed $fp, $sp, 128 :: (store (s64) into %stack.14)
; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -12, implicit $vg
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p15, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.12)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.13)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p14, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.11)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.12)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p13, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 2 :: (store (s128) into %stack.10)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 2 :: (store (s128) into %stack.11)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p12, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 3 :: (store (s128) into %stack.9)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 3 :: (store (s128) into %stack.10)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p11, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 4 :: (store (s128) into %stack.8)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 4 :: (store (s128) into %stack.9)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p10, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 5 :: (store (s128) into %stack.7)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 5 :: (store (s128) into %stack.8)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p9, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 6 :: (store (s128) into %stack.6)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 6 :: (store (s128) into %stack.7)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p8, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 7 :: (store (s128) into %stack.5)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 7 :: (store (s128) into %stack.6)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p7, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 8 :: (store (s128) into %stack.4)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 8 :: (store (s128) into %stack.5)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p6, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 9 :: (store (s128) into %stack.3)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 9 :: (store (s128) into %stack.4)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p5, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 10 :: (store (s128) into %stack.2)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 10 :: (store (s128) into %stack.3)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p4, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 11 :: (store (s128) into %stack.1)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 11 :: (store (s128) into %stack.2)
+ ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1024, 0
; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg
;
; EXPAND-NEXT: $nzcv = IMPLICIT_DEF
;
; EXPAND-NEXT: $z0 = CPY_ZPzI_B $p0, 1, 0
- ; EXPAND-NEXT: STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.0)
+ ; EXPAND-NEXT: $x8 = ADDXri $sp, 1024, 0
+ ; EXPAND-NEXT: STR_ZXI $z0, $x8, 0 :: (store (s128) into %stack.0)
;
; EXPAND-NEXT: $p0 = IMPLICIT_DEF
; EXPAND-NEXT: $p1 = IMPLICIT_DEF
@@ -275,7 +283,7 @@ body: |
; EXPAND-NEXT: $p14 = IMPLICIT_DEF
; EXPAND-NEXT: $p15 = IMPLICIT_DEF
;
- ; EXPAND-NEXT: $z0 = LDR_ZXI $sp, 0 :: (load (s128) from %stack.0)
+ ; EXPAND-NEXT: $z0 = LDR_ZXI killed $x8, 0 :: (load (s128) from %stack.0)
; EXPAND-NEXT: $x0 = MRS 55824, implicit-def $nzcv, implicit $nzcv
; EXPAND-NEXT: $p0 = PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p0 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
@@ -283,45 +291,47 @@ body: |
;
; EXPAND-NEXT: FAKE_USE implicit $nzcv
;
+ ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1024, 0
; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.12)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.13)
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p15 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.11)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.12)
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p14 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.10)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.11)
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p13 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.9)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.10)
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p12 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.8)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.9)
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p11 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.7)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.8)
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p10 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.6)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.7)
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p9 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.5)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.6)
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.4)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.5)
; EXPAND-NEXT: $p7 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p7, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.3)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.4)
; EXPAND-NEXT: $p6 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p6, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.2)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.3)
; EXPAND-NEXT: $p5 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p5, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.1)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.2)
; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 12, implicit $vg
- ; EXPAND-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.13)
+ ; EXPAND-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.14)
+ ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0
; EXPAND-NEXT: RET undef $lr, implicit $p0
$nzcv = IMPLICIT_DEF
@@ -369,7 +379,6 @@ body: |
liveins: $p0, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7
; CHECK-LABEL: name: zpr_predicate_spill__save_restore_nzcv__spill_gpr
- ; CHECK: stack:
; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16,
; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register:
; CHECK: liveins: $p0, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7
@@ -410,38 +419,41 @@ body: |
; CHECK-NEXT: $p0 = FILL_PPR_FROM_ZPR_SLOT_PSEUDO %stack.0, 0 :: (load (s128) from %stack.0)
;
; CHECK-NEXT: FAKE_USE implicit $nzcv, implicit $x8, implicit $x9, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $x18
+ ;
; CHECK-NEXT: RET_ReallyLR implicit $p0, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $x18
; EXPAND-LABEL: name: zpr_predicate_spill__save_restore_nzcv__spill_gpr
; EXPAND: liveins: $p0, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $fp, $p15, $p14, $p13, $p12, $p11, $p10, $p9, $p8, $p7, $p6, $p5, $p4
; EXPAND-NEXT: {{ $}}
;
- ; EXPAND-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.13)
+ ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1040, 0
+ ; EXPAND-NEXT: frame-setup STRXui killed $fp, $sp, 128 :: (store (s64) into %stack.14)
; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -12, implicit $vg
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p15, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.12)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.13)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p14, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.11)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.12)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p13, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 2 :: (store (s128) into %stack.10)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 2 :: (store (s128) into %stack.11)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p12, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 3 :: (store (s128) into %stack.9)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 3 :: (store (s128) into %stack.10)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p11, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 4 :: (store (s128) into %stack.8)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 4 :: (store (s128) into %stack.9)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p10, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 5 :: (store (s128) into %stack.7)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 5 :: (store (s128) into %stack.8)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p9, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 6 :: (store (s128) into %stack.6)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 6 :: (store (s128) into %stack.7)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p8, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 7 :: (store (s128) into %stack.5)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 7 :: (store (s128) into %stack.6)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p7, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 8 :: (store (s128) into %stack.4)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 8 :: (store (s128) into %stack.5)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p6, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 9 :: (store (s128) into %stack.3)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 9 :: (store (s128) into %stack.4)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p5, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 10 :: (store (s128) into %stack.2)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 10 :: (store (s128) into %stack.3)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p4, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 11 :: (store (s128) into %stack.1)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 11 :: (store (s128) into %stack.2)
+ ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1040, 0
; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg
;
; EXPAND-NEXT: $nzcv = IMPLICIT_DEF
@@ -458,7 +470,8 @@ body: |
; EXPAND-NEXT: $x18 = IMPLICIT_DEF
;
; EXPAND-NEXT: $z0 = CPY_ZPzI_B $p0, 1, 0
- ; EXPAND-NEXT: STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.0)
+ ; EXPAND-NEXT: $fp = ADDXri $sp, 1040, 0
+ ; EXPAND-NEXT: STR_ZXI $z0, $fp, 0 :: (store (s128) into %stack.0)
;
; EXPAND-NEXT: $p0 = IMPLICIT_DEF
; EXPAND-NEXT: $p1 = IMPLICIT_DEF
@@ -477,56 +490,57 @@ body: |
; EXPAND-NEXT: $p14 = IMPLICIT_DEF
; EXPAND-NEXT: $p15 = IMPLICIT_DEF
;
- ; EXPAND-NEXT: $fp = ADDVL_XXI $sp, 13, implicit $vg
- ; EXPAND-NEXT: STRXui $x0, killed $fp, 1 :: (store (s64) into %stack.14)
- ; EXPAND-NEXT: $z0 = LDR_ZXI $sp, 0 :: (load (s128) from %stack.0)
+ ; EXPAND-NEXT: STRXui $x0, $sp, 1 :: (store (s64) into %stack.16)
+ ; EXPAND-NEXT: $z0 = LDR_ZXI killed $fp, 0 :: (load (s128) from %stack.0)
; EXPAND-NEXT: $x0 = MRS 55824, implicit-def $nzcv, implicit $nzcv
; EXPAND-NEXT: $p0 = PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p0 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: MSR 55824, $x0, implicit-def $nzcv
- ; EXPAND-NEXT: $fp = ADDVL_XXI $sp, 13, implicit $vg
- ; EXPAND-NEXT: $x0 = LDRXui killed $fp, 1 :: (load (s64) from %stack.14)
+ ; EXPAND-NEXT: $x0 = LDRXui $sp, 1 :: (load (s64) from %stack.16)
;
; EXPAND-NEXT: FAKE_USE implicit $nzcv, implicit $x8, implicit $x9, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $x18
+ ;
+ ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0
; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.12)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.13)
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p15 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.11)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.12)
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p14 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.10)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.11)
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p13 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.9)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.10)
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p12 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.8)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.9)
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p11 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.7)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.8)
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p10 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.6)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.7)
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p9 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.5)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.6)
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.4)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.5)
; EXPAND-NEXT: $p7 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p7, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.3)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.4)
; EXPAND-NEXT: $p6 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p6, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.2)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.3)
; EXPAND-NEXT: $p5 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p5, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.1)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.2)
; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 12, implicit $vg
- ; EXPAND-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.13)
+ ; EXPAND-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.14)
+ ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0
; EXPAND-NEXT: RET undef $lr, implicit $p0, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $x18
$nzcv = IMPLICIT_DEF
$x8 = IMPLICIT_DEF
@@ -585,7 +599,6 @@ body: |
liveins: $p0, $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7
; CHECK-LABEL: name: zpr_predicate_spill__spill_zpr
- ; CHECK: stack:
; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16,
; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register:
; CHECK: liveins: $p0, $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7
@@ -630,46 +643,49 @@ body: |
; CHECK-NEXT: $p0 = FILL_PPR_FROM_ZPR_SLOT_PSEUDO %stack.0, 0 :: (load (s128) from %stack.0)
;
; CHECK-NEXT: FAKE_USE implicit $z16, implicit $z17, implicit $z18, implicit $z19, implicit $z20, implicit $z21, implicit $z22, implicit $z23, implicit $z24, implicit $z25, implicit $z26, implicit $z27, implicit $z28, implicit $z29, implicit $z30, implicit $z31
+ ;
; CHECK-NEXT: RET_ReallyLR implicit $p0, implicit $z0, implicit $z1, implicit $z2, implicit $z3, implicit $z4, implicit $z5, implicit $z6, implicit $z7
; EXPAND-LABEL: name: zpr_predicate_spill__spill_zpr
; EXPAND: liveins: $p0, $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7, $fp, $p15, $p14, $p13, $p12, $p11, $p10, $p9, $p8, $p7, $p6, $p5, $p4, $z23, $z22, $z21, $z20, $z19, $z18, $z17, $z16
; EXPAND-NEXT: {{ $}}
;
- ; EXPAND-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.21)
+ ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1040, 0
+ ; EXPAND-NEXT: frame-setup STRXui killed $fp, $sp, 128 :: (store (s64) into %stack.22)
; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -20, implicit $vg
; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p15, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 0 :: (store (s128) into %stack.20)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 0 :: (store (s128) into %stack.21)
; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p14, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 1 :: (store (s128) into %stack.19)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 1 :: (store (s128) into %stack.20)
; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p13, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 2 :: (store (s128) into %stack.18)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 2 :: (store (s128) into %stack.19)
; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p12, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 3 :: (store (s128) into %stack.17)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 3 :: (store (s128) into %stack.18)
; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p11, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 4 :: (store (s128) into %stack.16)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 4 :: (store (s128) into %stack.17)
; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p10, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 5 :: (store (s128) into %stack.15)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 5 :: (store (s128) into %stack.16)
; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p9, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 6 :: (store (s128) into %stack.14)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 6 :: (store (s128) into %stack.15)
; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p8, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 7 :: (store (s128) into %stack.13)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 7 :: (store (s128) into %stack.14)
; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p7, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 8 :: (store (s128) into %stack.12)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 8 :: (store (s128) into %stack.13)
; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p6, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 9 :: (store (s128) into %stack.11)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 9 :: (store (s128) into %stack.12)
; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p5, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 10 :: (store (s128) into %stack.10)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 10 :: (store (s128) into %stack.11)
; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p4, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 11 :: (store (s128) into %stack.9)
- ; EXPAND-NEXT: frame-setup STR_ZXI killed $z23, $sp, 12 :: (store (s128) into %stack.8)
- ; EXPAND-NEXT: frame-setup STR_ZXI killed $z22, $sp, 13 :: (store (s128) into %stack.7)
- ; EXPAND-NEXT: frame-setup STR_ZXI killed $z21, $sp, 14 :: (store (s128) into %stack.6)
- ; EXPAND-NEXT: frame-setup STR_ZXI killed $z20, $sp, 15 :: (store (s128) into %stack.5)
- ; EXPAND-NEXT: frame-setup STR_ZXI killed $z19, $sp, 16 :: (store (s128) into %stack.4)
- ; EXPAND-NEXT: frame-setup STR_ZXI killed $z18, $sp, 17 :: (store (s128) into %stack.3)
- ; EXPAND-NEXT: frame-setup STR_ZXI killed $z17, $sp, 18 :: (store (s128) into %stack.2)
- ; EXPAND-NEXT: frame-setup STR_ZXI killed $z16, $sp, 19 :: (store (s128) into %stack.1)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 11 :: (store (s128) into %stack.10)
+ ; EXPAND-NEXT: frame-setup STR_ZXI killed $z23, $sp, 12 :: (store (s128) into %stack.9)
+ ; EXPAND-NEXT: frame-setup STR_ZXI killed $z22, $sp, 13 :: (store (s128) into %stack.8)
+ ; EXPAND-NEXT: frame-setup STR_ZXI killed $z21, $sp, 14 :: (store (s128) into %stack.7)
+ ; EXPAND-NEXT: frame-setup STR_ZXI killed $z20, $sp, 15 :: (store (s128) into %stack.6)
+ ; EXPAND-NEXT: frame-setup STR_ZXI killed $z19, $sp, 16 :: (store (s128) into %stack.5)
+ ; EXPAND-NEXT: frame-setup STR_ZXI killed $z18, $sp, 17 :: (store (s128) into %stack.4)
+ ; EXPAND-NEXT: frame-setup STR_ZXI killed $z17, $sp, 18 :: (store (s128) into %stack.3)
+ ; EXPAND-NEXT: frame-setup STR_ZXI killed $z16, $sp, 19 :: (store (s128) into %stack.2)
+ ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1024, 0
; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2, implicit $vg
;
; EXPAND-NEXT: $z16 = IMPLICIT_DEF
@@ -689,10 +705,11 @@ body: |
; EXPAND-NEXT: $z30 = IMPLICIT_DEF
; EXPAND-NEXT: $z31 = IMPLICIT_DEF
;
- ; EXPAND-NEXT: STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.22)
+ ; EXPAND-NEXT: $x8 = ADDXri $sp, 1024, 0
+ ; EXPAND-NEXT: STR_ZXI $z0, $x8, 0 :: (store (s128) into %stack.24)
; EXPAND-NEXT: $z0 = CPY_ZPzI_B $p0, 1, 0
- ; EXPAND-NEXT: STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.0)
- ; EXPAND-NEXT: $z0 = LDR_ZXI $sp, 0 :: (load (s128) from %stack.22)
+ ; EXPAND-NEXT: STR_ZXI $z0, $x8, 1 :: (store (s128) into %stack.0)
+ ; EXPAND-NEXT: $z0 = LDR_ZXI $x8, 0 :: (load (s128) from %stack.24)
;
; EXPAND-NEXT: $p0 = IMPLICIT_DEF
; EXPAND-NEXT: $p1 = IMPLICIT_DEF
@@ -711,60 +728,63 @@ body: |
; EXPAND-NEXT: $p14 = IMPLICIT_DEF
; EXPAND-NEXT: $p15 = IMPLICIT_DEF
;
- ; EXPAND-NEXT: STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.22)
- ; EXPAND-NEXT: $z0 = LDR_ZXI $sp, 1 :: (load (s128) from %stack.0)
+ ; EXPAND-NEXT: STR_ZXI $z0, $x8, 0 :: (store (s128) into %stack.24)
+ ; EXPAND-NEXT: $z0 = LDR_ZXI $x8, 1 :: (load (s128) from %stack.0)
; EXPAND-NEXT: $p0 = PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p0 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = LDR_ZXI $sp, 0 :: (load (s128) from %stack.22)
+ ; EXPAND-NEXT: $z0 = LDR_ZXI killed $x8, 0 :: (load (s128) from %stack.24)
;
; EXPAND-NEXT: FAKE_USE implicit $z16, implicit $z17, implicit $z18, implicit $z19, implicit $z20, implicit $z21, implicit $z22, implicit $z23, implicit $z24, implicit $z25, implicit $z26, implicit $z27, implicit $z28, implicit $z29, implicit $z30, implicit $z31
+ ;
+ ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1024, 0
; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2, implicit $vg
- ; EXPAND-NEXT: $z23 = frame-destroy LDR_ZXI $sp, 12 :: (load (s128) from %stack.8)
- ; EXPAND-NEXT: $z22 = frame-destroy LDR_ZXI $sp, 13 :: (load (s128) from %stack.7)
- ; EXPAND-NEXT: $z21 = frame-destroy LDR_ZXI $sp, 14 :: (load (s128) from %stack.6)
- ; EXPAND-NEXT: $z20 = frame-destroy LDR_ZXI $sp, 15 :: (load (s128) from %stack.5)
- ; EXPAND-NEXT: $z19 = frame-destroy LDR_ZXI $sp, 16 :: (load (s128) from %stack.4)
- ; EXPAND-NEXT: $z18 = frame-destroy LDR_ZXI $sp, 17 :: (load (s128) from %stack.3)
- ; EXPAND-NEXT: $z17 = frame-destroy LDR_ZXI $sp, 18 :: (load (s128) from %stack.2)
- ; EXPAND-NEXT: $z16 = frame-destroy LDR_ZXI $sp, 19 :: (load (s128) from %stack.1)
- ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.20)
+ ; EXPAND-NEXT: $z23 = frame-destroy LDR_ZXI $sp, 12 :: (load (s128) from %stack.9)
+ ; EXPAND-NEXT: $z22 = frame-destroy LDR_ZXI $sp, 13 :: (load (s128) from %stack.8)
+ ; EXPAND-NEXT: $z21 = frame-destroy LDR_ZXI $sp, 14 :: (load (s128) from %stack.7)
+ ; EXPAND-NEXT: $z20 = frame-destroy LDR_ZXI $sp, 15 :: (load (s128) from %stack.6)
+ ; EXPAND-NEXT: $z19 = frame-destroy LDR_ZXI $sp, 16 :: (load (s128) from %stack.5)
+ ; EXPAND-NEXT: $z18 = frame-destroy LDR_ZXI $sp, 17 :: (load (s128) from %stack.4)
+ ; EXPAND-NEXT: $z17 = frame-destroy LDR_ZXI $sp, 18 :: (load (s128) from %stack.3)
+ ; EXPAND-NEXT: $z16 = frame-destroy LDR_ZXI $sp, 19 :: (load (s128) from %stack.2)
+ ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.21)
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p15 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.19)
+ ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.20)
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p14 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.18)
+ ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.19)
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p13 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.17)
+ ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.18)
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p12 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.16)
+ ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.17)
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p11 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.15)
+ ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.16)
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p10 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.14)
+ ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.15)
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p9 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.13)
+ ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.14)
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.12)
+ ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.13)
; EXPAND-NEXT: $p7 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p7, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.11)
+ ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.12)
; EXPAND-NEXT: $p6 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p6, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.10)
+ ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.11)
; EXPAND-NEXT: $p5 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p5, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.9)
+ ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.10)
; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 20, implicit $vg
- ; EXPAND-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.21)
+ ; EXPAND-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.22)
+ ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0
; EXPAND-NEXT: RET undef $lr, implicit $p0, implicit $z0, implicit $z1, implicit $z2, implicit $z3, implicit $z4, implicit $z5, implicit $z6, implicit $z7
$z16 = IMPLICIT_DEF
$z17 = IMPLICIT_DEF
@@ -822,7 +842,6 @@ body: |
liveins: $p0, $p1, $p2, $p3
; CHECK-LABEL: name: zpr_predicate_spill_above_p7
- ; CHECK: stack:
; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16,
; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register:
; CHECK: liveins: $p0, $p1, $p2, $p3
@@ -852,44 +871,48 @@ body: |
; CHECK-NEXT: $p15 = FILL_PPR_FROM_ZPR_SLOT_PSEUDO %stack.0, 0 :: (load (s128) from %stack.0)
;
; CHECK-NEXT: FAKE_USE implicit $p4, implicit $p5, implicit $p6, implicit $p7
+ ;
; CHECK-NEXT: RET_ReallyLR implicit $p0, implicit $p1, implicit $p2, implicit $p3
; EXPAND-LABEL: name: zpr_predicate_spill_above_p7
; EXPAND: liveins: $p0, $p1, $p2, $p3, $fp, $p15, $p14, $p13, $p12, $p11, $p10, $p9, $p8, $p7, $p6, $p5, $p4
; EXPAND-NEXT: {{ $}}
;
- ; EXPAND-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.13)
+ ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1040, 0
+ ; EXPAND-NEXT: frame-setup STRXui killed $fp, $sp, 128 :: (store (s64) into %stack.14)
; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -12, implicit $vg
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p15, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.12)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.13)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p14, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.11)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.12)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p13, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 2 :: (store (s128) into %stack.10)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 2 :: (store (s128) into %stack.11)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p12, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 3 :: (store (s128) into %stack.9)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 3 :: (store (s128) into %stack.10)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p11, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 4 :: (store (s128) into %stack.8)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 4 :: (store (s128) into %stack.9)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p10, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 5 :: (store (s128) into %stack.7)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 5 :: (store (s128) into %stack.8)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p9, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 6 :: (store (s128) into %stack.6)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 6 :: (store (s128) into %stack.7)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p8, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 7 :: (store (s128) into %stack.5)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 7 :: (store (s128) into %stack.6)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p7, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 8 :: (store (s128) into %stack.4)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 8 :: (store (s128) into %stack.5)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p6, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 9 :: (store (s128) into %stack.3)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 9 :: (store (s128) into %stack.4)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p5, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 10 :: (store (s128) into %stack.2)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 10 :: (store (s128) into %stack.3)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p4, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 11 :: (store (s128) into %stack.1)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 11 :: (store (s128) into %stack.2)
+ ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1024, 0
; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2, implicit $vg
;
; EXPAND-NEXT: $p15 = IMPLICIT_DEF
;
; EXPAND-NEXT: $z0 = CPY_ZPzI_B $p15, 1, 0
- ; EXPAND-NEXT: STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.0)
+ ; EXPAND-NEXT: $x8 = ADDXri $sp, 1024, 0
+ ; EXPAND-NEXT: STR_ZXI $z0, $x8, 1 :: (store (s128) into %stack.0)
;
; EXPAND-NEXT: $p0 = IMPLICIT_DEF
; EXPAND-NEXT: $p1 = IMPLICIT_DEF
@@ -909,54 +932,57 @@ body: |
; EXPAND-NEXT: $p15 = IMPLICIT_DEF
;
; EXPAND-NEXT: $z0 = CPY_ZPzI_B $p0, 1, 0
- ; EXPAND-NEXT: STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.14)
- ; EXPAND-NEXT: $z0 = LDR_ZXI $sp, 1 :: (load (s128) from %stack.0)
+ ; EXPAND-NEXT: STR_ZXI $z0, $x8, 0 :: (store (s128) into %stack.16)
+ ; EXPAND-NEXT: $z0 = LDR_ZXI $x8, 1 :: (load (s128) from %stack.0)
; EXPAND-NEXT: $p0 = PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p15 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = LDR_ZXI $sp, 0 :: (load (s128) from %stack.14)
+ ; EXPAND-NEXT: $z0 = LDR_ZXI killed $x8, 0 :: (load (s128) from %stack.16)
; EXPAND-NEXT: $p0 = PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p0 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
;
; EXPAND-NEXT: FAKE_USE implicit $p4, implicit $p5, implicit $p6, implicit $p7
+ ;
+ ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1024, 0
; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2, implicit $vg
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.12)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.13)
; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p15 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.11)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.12)
; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p14 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.10)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.11)
; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p13 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.9)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.10)
; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p12 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.8)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.9)
; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p11 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.7)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.8)
; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p10 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.6)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.7)
; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p9 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.5)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.6)
; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.4)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.5)
; EXPAND-NEXT: $p7 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p7, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.3)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.4)
; EXPAND-NEXT: $p6 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p6, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.2)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.3)
; EXPAND-NEXT: $p5 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p5, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.1)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.2)
; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 12, implicit $vg
- ; EXPAND-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.13)
+ ; EXPAND-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.14)
+ ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0
; EXPAND-NEXT: RET undef $lr, implicit $p0, implicit $p1, implicit $p2, implicit $p3
$p15 = IMPLICIT_DEF
%1:ppr = COPY $p15
@@ -1000,31 +1026,33 @@ body: |
; CHECK-LABEL: name: zpr_predicate_spill_p4_saved
; CHECK: liveins: $p0, $p1, $p2, $p3
; CHECK-NEXT: {{ $}}
- ;
; CHECK-NEXT: $p8 = IMPLICIT_DEF
- ;
; CHECK-NEXT: RET_ReallyLR implicit $p0, implicit $p1, implicit $p2, implicit $p3
-
+ ;
; EXPAND-LABEL: name: zpr_predicate_spill_p4_saved
; EXPAND: liveins: $p0, $p1, $p2, $p3, $fp, $p8, $p4
; EXPAND-NEXT: {{ $}}
- ; EXPAND-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.2)
+ ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1040, 0
+ ; EXPAND-NEXT: frame-setup STRXui killed $fp, $sp, 128 :: (store (s64) into %stack.3)
; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2, implicit $vg
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p8, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.1)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.2)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p4, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.0)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.1)
+ ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1024, 0
;
; EXPAND-NEXT: $p8 = IMPLICIT_DEF
;
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.1)
+ ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1024, 0
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.2)
; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.0)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.1)
; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2, implicit $vg
- ; EXPAND-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2)
+ ; EXPAND-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.3)
+ ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0
; EXPAND-NEXT: RET undef $lr, implicit $p0, implicit $p1, implicit $p2, implicit $p3
; If we spill a register above p8, p4 must also be saved, so we can guarantee
>From e522bb68cc0a96ff491e50cfcc92f402efa6b2bf Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Fri, 24 Jan 2025 17:16:14 +0000
Subject: [PATCH 3/5] Fixups
---
.../Target/AArch64/AArch64FrameLowering.cpp | 168 ++++++++++--------
llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 2 +-
.../AArch64/spill-fill-zpr-predicates.mir | 111 ++++++------
3 files changed, 149 insertions(+), 132 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index a2eacc69aee71b..5bbf07b607bc30 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -4208,20 +4208,22 @@ struct ScopedScavengeOrSpill {
Register SpillCandidate, const TargetRegisterClass &RC,
LiveRegUnits const &UsedRegs,
BitVector const &AllocatableRegs,
- std::optional<int> &MaybeSpillFI)
+ std::optional<int> *MaybeSpillFI)
: MBB(MBB), MBBI(MBBI), RC(RC), TII(static_cast<const AArch64InstrInfo &>(
*MF.getSubtarget().getInstrInfo())),
TRI(*MF.getSubtarget().getRegisterInfo()) {
FreeReg = tryScavengeRegister(UsedRegs, AllocatableRegs);
if (FreeReg != AArch64::NoRegister)
return;
- if (!MaybeSpillFI) {
+ assert(MaybeSpillFI && "Expected emergency spill slot FI information "
+ "(attempted to spill in prologue/epilogue?)");
+ if (!MaybeSpillFI->has_value()) {
MachineFrameInfo &MFI = MF.getFrameInfo();
- MaybeSpillFI = MFI.CreateSpillStackObject(TRI.getSpillSize(RC),
- TRI.getSpillAlign(RC));
+ *MaybeSpillFI = MFI.CreateSpillStackObject(TRI.getSpillSize(RC),
+ TRI.getSpillAlign(RC));
}
FreeReg = SpilledReg = SpillCandidate;
- SpillFI = *MaybeSpillFI;
+ SpillFI = MaybeSpillFI->value();
TII.storeRegToStackSlot(MBB, MBBI, SpilledReg, false, SpillFI, &RC, &TRI,
Register());
}
@@ -4252,6 +4254,18 @@ struct EmergencyStackSlots {
std::optional<int> GPRSpillFI;
};
+/// Registers available for scavenging (ZPR, PPR3b, GPR).
+struct ScavengeableRegs {
+ BitVector ZPRRegs;
+ BitVector PPR3bRegs;
+ BitVector GPRRegs;
+};
+
+static bool isInPrologueOrEpilogue(const MachineInstr &MI) {
+ return MI.getFlag(MachineInstr::FrameSetup) ||
+ MI.getFlag(MachineInstr::FrameDestroy);
+}
+
/// Expands:
/// ```
/// SPILL_PPR_TO_ZPR_SLOT_PSEUDO $p0, %stack.0, 0
@@ -4267,24 +4281,17 @@ static void expandSpillPPRToZPRSlotPseudo(MachineBasicBlock &MBB,
MachineInstr &MI,
const TargetRegisterInfo &TRI,
LiveRegUnits const &UsedRegs,
- BitVector const &ZPRRegs,
+ ScavengeableRegs const &Regs,
EmergencyStackSlots &SpillSlots) {
MachineFunction &MF = *MBB.getParent();
auto *TII =
static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
Register ZPredReg = AArch64::NoRegister;
- ScopedScavengeOrSpill FindZPRReg(MF, MBB, MachineBasicBlock::iterator(MI),
- ZPredReg, AArch64::Z0, AArch64::ZPRRegClass,
- UsedRegs, ZPRRegs, SpillSlots.ZPRSpillFI);
-
-#ifndef NDEBUG
- bool InPrologueOrEpilogue = MI.getFlag(MachineInstr::FrameSetup) ||
- MI.getFlag(MachineInstr::FrameDestroy);
- assert((!FindZPRReg.hasSpilled() || !InPrologueOrEpilogue) &&
- "SPILL_PPR_TO_ZPR_SLOT_PSEUDO expansion should not spill in prologue "
- "or epilogue");
-#endif
+ ScopedScavengeOrSpill FindZPRReg(
+ MF, MBB, MachineBasicBlock::iterator(MI), ZPredReg, AArch64::Z0,
+ AArch64::ZPRRegClass, UsedRegs, Regs.ZPRRegs,
+ isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.ZPRSpillFI);
SmallVector<MachineInstr *, 2> MachineInstrs;
const DebugLoc &DL = MI.getDebugLoc();
@@ -4317,44 +4324,37 @@ static void expandSpillPPRToZPRSlotPseudo(MachineBasicBlock &MBB,
/// spilling if necessary). If the status flags are in use at the point of
/// expansion they are preserved (by moving them to/from a GPR). This may cause
/// an additional spill if no GPR is free at the expansion point.
-static bool expandFillPPRFromZPRSlotPseudo(
- MachineBasicBlock &MBB, MachineInstr &MI, const TargetRegisterInfo &TRI,
- LiveRegUnits const &UsedRegs, BitVector const &ZPRRegs,
- BitVector const &PPR3bRegs, BitVector const &GPRRegs,
- EmergencyStackSlots &SpillSlots) {
+static bool expandFillPPRFromZPRSlotPseudo(MachineBasicBlock &MBB,
+ MachineInstr &MI,
+ const TargetRegisterInfo &TRI,
+ LiveRegUnits const &UsedRegs,
+ ScavengeableRegs const &Regs,
+ EmergencyStackSlots &SpillSlots) {
MachineFunction &MF = *MBB.getParent();
auto *TII =
static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
Register ZPredReg = AArch64::NoRegister;
- ScopedScavengeOrSpill FindZPRReg(MF, MBB, MachineBasicBlock::iterator(MI),
- ZPredReg, AArch64::Z0, AArch64::ZPRRegClass,
- UsedRegs, ZPRRegs, SpillSlots.ZPRSpillFI);
+ ScopedScavengeOrSpill FindZPRReg(
+ MF, MBB, MachineBasicBlock::iterator(MI), ZPredReg, AArch64::Z0,
+ AArch64::ZPRRegClass, UsedRegs, Regs.ZPRRegs,
+ isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.ZPRSpillFI);
Register PredReg = AArch64::NoRegister;
ScopedScavengeOrSpill FindPPR3bReg(
MF, MBB, MachineBasicBlock::iterator(MI), PredReg, AArch64::P0,
- AArch64::PPR_3bRegClass, UsedRegs, PPR3bRegs, SpillSlots.PPRSpillFI);
+ AArch64::PPR_3bRegClass, UsedRegs, Regs.PPR3bRegs,
+ isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.PPRSpillFI);
// Elide NZCV spills if we know it is not used.
Register NZCVSaveReg = AArch64::NoRegister;
bool IsNZCVUsed = !UsedRegs.available(AArch64::NZCV);
std::optional<ScopedScavengeOrSpill> FindGPRReg;
if (IsNZCVUsed)
- FindGPRReg.emplace(MF, MBB, MachineBasicBlock::iterator(MI), NZCVSaveReg,
- AArch64::X0, AArch64::GPR64RegClass, UsedRegs, GPRRegs,
- SpillSlots.GPRSpillFI);
-
-#ifndef NDEBUG
- bool Spilled = FindZPRReg.hasSpilled() || FindPPR3bReg.hasSpilled() ||
- (FindGPRReg && FindGPRReg->hasSpilled());
- bool InPrologueOrEpilogue = MI.getFlag(MachineInstr::FrameSetup) ||
- MI.getFlag(MachineInstr::FrameDestroy);
- assert((!Spilled || !InPrologueOrEpilogue) &&
- "FILL_PPR_FROM_ZPR_SLOT_PSEUDO expansion should not spill in prologue "
- "or epilogue");
-#endif
-
+ FindGPRReg.emplace(
+ MF, MBB, MachineBasicBlock::iterator(MI), NZCVSaveReg, AArch64::X0,
+ AArch64::GPR64RegClass, UsedRegs, Regs.GPRRegs,
+ isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.GPRSpillFI);
SmallVector<MachineInstr *, 4> MachineInstrs;
const DebugLoc &DL = MI.getDebugLoc();
MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::LDR_ZXI))
@@ -4393,26 +4393,27 @@ static bool expandFillPPRFromZPRSlotPseudo(
/// Expands all FILL_PPR_FROM_ZPR_SLOT_PSEUDO and SPILL_PPR_TO_ZPR_SLOT_PSEUDO
/// operations within the MachineBasicBlock \p MBB.
-static bool expandSMEPPRToZPRSpillPseudos(MachineBasicBlock &MBB,
- const TargetRegisterInfo &TRI,
- BitVector const &ZPRRegs,
- BitVector const &PPR3bRegs,
- BitVector const &GPRRegs,
- EmergencyStackSlots &SpillSlots) {
+static bool expandSMEPPRToZPRSpillPseudos(
+ MachineBasicBlock &MBB, const TargetRegisterInfo &TRI,
+ ScavengeableRegs const &ScavengeableRegsBody,
+ ScavengeableRegs const &ScavengeableRegsFrameSetup,
+ EmergencyStackSlots &SpillSlots) {
LiveRegUnits UsedRegs(TRI);
UsedRegs.addLiveOuts(MBB);
bool HasPPRSpills = false;
for (MachineInstr &MI : make_early_inc_range(reverse(MBB))) {
UsedRegs.stepBackward(MI);
+ ScavengeableRegs const &Regs = isInPrologueOrEpilogue(MI)
+ ? ScavengeableRegsFrameSetup
+ : ScavengeableRegsBody;
switch (MI.getOpcode()) {
case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
- HasPPRSpills |= expandFillPPRFromZPRSlotPseudo(
- MBB, MI, TRI, UsedRegs, ZPRRegs, PPR3bRegs, GPRRegs, SpillSlots);
+ HasPPRSpills |= expandFillPPRFromZPRSlotPseudo(MBB, MI, TRI, UsedRegs,
+ Regs, SpillSlots);
MI.eraseFromParent();
break;
case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
- expandSpillPPRToZPRSlotPseudo(MBB, MI, TRI, UsedRegs, ZPRRegs,
- SpillSlots);
+ expandSpillPPRToZPRSlotPseudo(MBB, MI, TRI, UsedRegs, Regs, SpillSlots);
MI.eraseFromParent();
break;
default:
@@ -4430,40 +4431,47 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
const TargetSubtargetInfo &TSI = MF.getSubtarget();
const TargetRegisterInfo &TRI = *TSI.getRegisterInfo();
if (AFI->hasStackFrame() && TRI.getSpillSize(AArch64::PPRRegClass) == 16) {
- const uint32_t *CSRMask =
- TRI.getCallPreservedMask(MF, MF.getFunction().getCallingConv());
+ // If predicates spills are 16-bytes we may need to expand
+ // SPILL_PPR_TO_ZPR_SLOT_PSEUDO/FILL_PPR_FROM_ZPR_SLOT_PSEUDO.
+
const MachineFrameInfo &MFI = MF.getFrameInfo();
assert(MFI.isCalleeSavedInfoValid());
+ const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
auto ComputeScavengeableRegisters = [&](unsigned RegClassID) {
- BitVector ScavengeableRegs =
- TRI.getAllocatableSet(MF, TRI.getRegClass(RegClassID));
- if (CSRMask)
- ScavengeableRegs.clearBitsInMask(CSRMask);
- // TODO: Allow reusing callee-saved registers that have been saved.
- assert(ScavengeableRegs.count() > 0 && "Expected scavengeable registers");
- return ScavengeableRegs;
+ BitVector Regs = TRI.getAllocatableSet(MF, TRI.getRegClass(RegClassID));
+
+ for (const CalleeSavedInfo &I : CSI)
+ if (TRI.getRegClass(RegClassID)->contains(I.getReg()))
+ Regs.set(I.getReg());
+
+ assert(Regs.count() > 0 && "Expected scavengeable registers");
+ return Regs;
};
- // If predicates spills are 16-bytes we may need to expand
- // SPILL_PPR_TO_ZPR_SLOT_PSEUDO/FILL_PPR_FROM_ZPR_SLOT_PSEUDO.
- // These are handled separately as we need to compute register liveness to
- // scavenge a ZPR and PPR during the expansion.
- BitVector ZPRRegs = ComputeScavengeableRegisters(AArch64::ZPRRegClassID);
+ const uint32_t *CSRMask =
+ TRI.getCallPreservedMask(MF, MF.getFunction().getCallingConv());
+
+ // Registers free to scavenge in the function body.
+ ScavengeableRegs ScavengeableRegsBody;
+ ScavengeableRegsBody.ZPRRegs =
+ ComputeScavengeableRegisters(AArch64::ZPRRegClassID);
// Only p0-7 are possible as the second operand of cmpne (needed for fills).
- BitVector PPR3bRegs =
+ ScavengeableRegsBody.PPR3bRegs =
ComputeScavengeableRegisters(AArch64::PPR_3bRegClassID);
- BitVector GPRRegs = ComputeScavengeableRegisters(AArch64::GPR64RegClassID);
-
- bool SpillsAboveP7 =
- any_of(MFI.getCalleeSavedInfo(), [](const CalleeSavedInfo &CSI) {
- return AArch64::PPR_p8to15RegClass.contains(CSI.getReg());
- });
- // We spill p4 in determineCalleeSaves() if a predicate above p8 is spilled,
- // as it may be needed to reload callee saves (if p0-p3 are used as
- // returns).
- if (SpillsAboveP7)
- PPR3bRegs.set(AArch64::P4);
+ ScavengeableRegsBody.GPRRegs =
+ ComputeScavengeableRegisters(AArch64::GPR64RegClassID);
+
+ // Registers free to scavenge in the prologue/epilogue.
+ ScavengeableRegs ScavengeableRegsFrameSetup = ScavengeableRegsBody;
+ ScavengeableRegsFrameSetup.ZPRRegs.clearBitsInMask(CSRMask);
+ ScavengeableRegsFrameSetup.GPRRegs.clearBitsInMask(CSRMask);
+ // Note: If p4 was available allow it to be scavenged (even though it is a
+ // CSR). P4 is reloaded last in the epilogue and is needed to reload
+ // predicates >= p8 if p0-p3 are used as return values.
+ ScavengeableRegsFrameSetup.PPR3bRegs.clearBitsInMask(CSRMask);
+ if (ScavengeableRegsBody.PPR3bRegs[AArch64::P4])
+ ScavengeableRegsFrameSetup.PPR3bRegs.set(AArch64::P4);
EmergencyStackSlots SpillSlots;
for (MachineBasicBlock &MBB : MF) {
@@ -4474,7 +4482,8 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
// p0-p7 never requires spilling another predicate.
for (int Pass = 0; Pass < 2; Pass++) {
bool HasPPRSpills = expandSMEPPRToZPRSpillPseudos(
- MBB, TRI, ZPRRegs, PPR3bRegs, GPRRegs, SpillSlots);
+ MBB, TRI, ScavengeableRegsBody, ScavengeableRegsFrameSetup,
+ SpillSlots);
assert((Pass == 0 || !HasPPRSpills) && "Did not expect PPR spills");
if (!HasPPRSpills)
break;
@@ -5528,9 +5537,10 @@ void AArch64FrameLowering::emitRemarks(
// spill/fill the predicate as a data vector (so are an FPR acess).
if (MI.getOpcode() != AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO &&
MI.getOpcode() != AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO &&
- AArch64::PPRRegClass.contains(MI.getOperand(0).getReg()))
+ AArch64::PPRRegClass.contains(MI.getOperand(0).getReg())) {
+ MI.dump();
RegTy = StackAccess::PPR;
- else
+ } else
RegTy = StackAccess::FPR;
} else if (AArch64InstrInfo::isFpOrNEON(MI)) {
RegTy = StackAccess::FPR;
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 34d05c6457e057..5864f57582e21c 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -414,7 +414,7 @@ unsigned AArch64Subtarget::getHwModeSet() const {
//
// FIXME: This overrides the table-gen'd `getHwModeSet()` which only looks at
// CPU features.
- if (EnableZPRPredicateSpills.getValue() && getStreamingHazardSize() > 0 &&
+ if (EnableZPRPredicateSpills.getValue() &&
(isStreaming() || isStreamingCompatible())) {
Modes |= (1 << 0);
}
diff --git a/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir b/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir
index 8aa957f04efc07..b58f91ac68a932 100644
--- a/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir
+++ b/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir
@@ -10,7 +10,7 @@
define aarch64_sve_vector_pcs void @zpr_predicate_spill__save_restore_nzcv() #0 { entry: unreachable }
- define aarch64_sve_vector_pcs void @zpr_predicate_spill__save_restore_nzcv__spill_gpr() #0 { entry: unreachable }
+ define aarch64_sve_vector_pcs void @zpr_predicate_spill__save_restore_nzcv__scavenge_csr_gpr() #0 { entry: unreachable }
define aarch64_sve_vector_pcs void @zpr_predicate_spill__spill_zpr() #0 { entry: unreachable }
@@ -31,6 +31,7 @@ body: |
liveins: $p0
; CHECK-LABEL: name: zpr_predicate_spill
+ ; CHECK: stack:
; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16,
; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register:
; CHECK: liveins: $p0
@@ -145,17 +146,17 @@ body: |
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.5)
- ; EXPAND-NEXT: $p7 = frame-destroy PTRUE_B 31, implicit $vg
- ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p7, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.4)
- ; EXPAND-NEXT: $p6 = frame-destroy PTRUE_B 31, implicit $vg
- ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p6, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.3)
- ; EXPAND-NEXT: $p5 = frame-destroy PTRUE_B 31, implicit $vg
- ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p5, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.2)
- ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
- ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 12, implicit $vg
; EXPAND-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.14)
; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0
@@ -194,6 +195,7 @@ body: |
liveins: $p0
; CHECK-LABEL: name: zpr_predicate_spill__save_restore_nzcv
+ ; CHECK: stack:
; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16,
; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register:
; CHECK: liveins: $p0
@@ -284,10 +286,10 @@ body: |
; EXPAND-NEXT: $p15 = IMPLICIT_DEF
;
; EXPAND-NEXT: $z0 = LDR_ZXI killed $x8, 0 :: (load (s128) from %stack.0)
- ; EXPAND-NEXT: $x0 = MRS 55824, implicit-def $nzcv, implicit $nzcv
+ ; EXPAND-NEXT: $fp = MRS 55824, implicit-def $nzcv, implicit $nzcv
; EXPAND-NEXT: $p0 = PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p0 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: MSR 55824, $x0, implicit-def $nzcv
+ ; EXPAND-NEXT: MSR 55824, $fp, implicit-def $nzcv
;
; EXPAND-NEXT: FAKE_USE implicit $nzcv
;
@@ -318,17 +320,17 @@ body: |
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.5)
- ; EXPAND-NEXT: $p7 = frame-destroy PTRUE_B 31, implicit $vg
- ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p7, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.4)
- ; EXPAND-NEXT: $p6 = frame-destroy PTRUE_B 31, implicit $vg
- ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p6, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.3)
- ; EXPAND-NEXT: $p5 = frame-destroy PTRUE_B 31, implicit $vg
- ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p5, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.2)
- ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
- ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 12, implicit $vg
; EXPAND-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.14)
; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0
@@ -361,7 +363,7 @@ body: |
RET_ReallyLR implicit $p0
...
---
-name: zpr_predicate_spill__save_restore_nzcv__spill_gpr
+name: zpr_predicate_spill__save_restore_nzcv__scavenge_csr_gpr
tracksRegLiveness: true
stack:
liveins:
@@ -378,13 +380,15 @@ body: |
bb.0.entry:
liveins: $p0, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7
- ; CHECK-LABEL: name: zpr_predicate_spill__save_restore_nzcv__spill_gpr
+ ; CHECK-LABEL: name: zpr_predicate_spill__save_restore_nzcv__scavenge_csr_gpr
+ ; CHECK: stack:
; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16,
; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register:
; CHECK: liveins: $p0, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7
; CHECK-NEXT: {{ $}}
;
; CHECK-NEXT: $nzcv = IMPLICIT_DEF
+ ;
; CHECK-NEXT: $x8 = IMPLICIT_DEF
; CHECK-NEXT: $x9 = IMPLICIT_DEF
; CHECK-NEXT: $x10 = IMPLICIT_DEF
@@ -422,7 +426,7 @@ body: |
;
; CHECK-NEXT: RET_ReallyLR implicit $p0, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $x18
- ; EXPAND-LABEL: name: zpr_predicate_spill__save_restore_nzcv__spill_gpr
+ ; EXPAND-LABEL: name: zpr_predicate_spill__save_restore_nzcv__scavenge_csr_gpr
; EXPAND: liveins: $p0, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $fp, $p15, $p14, $p13, $p12, $p11, $p10, $p9, $p8, $p7, $p6, $p5, $p4
; EXPAND-NEXT: {{ $}}
;
@@ -453,10 +457,11 @@ body: |
; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 10 :: (store (s128) into %stack.3)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p4, 1, 0
; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 11 :: (store (s128) into %stack.2)
- ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1040, 0
+ ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1024, 0
; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg
;
; EXPAND-NEXT: $nzcv = IMPLICIT_DEF
+ ;
; EXPAND-NEXT: $x8 = IMPLICIT_DEF
; EXPAND-NEXT: $x9 = IMPLICIT_DEF
; EXPAND-NEXT: $x10 = IMPLICIT_DEF
@@ -470,7 +475,7 @@ body: |
; EXPAND-NEXT: $x18 = IMPLICIT_DEF
;
; EXPAND-NEXT: $z0 = CPY_ZPzI_B $p0, 1, 0
- ; EXPAND-NEXT: $fp = ADDXri $sp, 1040, 0
+ ; EXPAND-NEXT: $fp = ADDXri $sp, 1024, 0
; EXPAND-NEXT: STR_ZXI $z0, $fp, 0 :: (store (s128) into %stack.0)
;
; EXPAND-NEXT: $p0 = IMPLICIT_DEF
@@ -490,17 +495,15 @@ body: |
; EXPAND-NEXT: $p14 = IMPLICIT_DEF
; EXPAND-NEXT: $p15 = IMPLICIT_DEF
;
- ; EXPAND-NEXT: STRXui $x0, $sp, 1 :: (store (s64) into %stack.16)
; EXPAND-NEXT: $z0 = LDR_ZXI killed $fp, 0 :: (load (s128) from %stack.0)
- ; EXPAND-NEXT: $x0 = MRS 55824, implicit-def $nzcv, implicit $nzcv
+ ; EXPAND-NEXT: $fp = MRS 55824, implicit-def $nzcv, implicit $nzcv
; EXPAND-NEXT: $p0 = PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p0 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: MSR 55824, $x0, implicit-def $nzcv
- ; EXPAND-NEXT: $x0 = LDRXui $sp, 1 :: (load (s64) from %stack.16)
+ ; EXPAND-NEXT: MSR 55824, $fp, implicit-def $nzcv
;
; EXPAND-NEXT: FAKE_USE implicit $nzcv, implicit $x8, implicit $x9, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $x18
;
- ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0
+ ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1024, 0
; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.13)
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
@@ -527,17 +530,17 @@ body: |
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.5)
- ; EXPAND-NEXT: $p7 = frame-destroy PTRUE_B 31, implicit $vg
- ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p7, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.4)
- ; EXPAND-NEXT: $p6 = frame-destroy PTRUE_B 31, implicit $vg
- ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p6, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.3)
- ; EXPAND-NEXT: $p5 = frame-destroy PTRUE_B 31, implicit $vg
- ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p5, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.2)
- ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
- ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 12, implicit $vg
; EXPAND-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.14)
; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0
@@ -599,6 +602,7 @@ body: |
liveins: $p0, $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7
; CHECK-LABEL: name: zpr_predicate_spill__spill_zpr
+ ; CHECK: stack:
; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16,
; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register:
; CHECK: liveins: $p0, $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7
@@ -771,17 +775,17 @@ body: |
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.13)
- ; EXPAND-NEXT: $p7 = frame-destroy PTRUE_B 31, implicit $vg
- ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p7, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.12)
- ; EXPAND-NEXT: $p6 = frame-destroy PTRUE_B 31, implicit $vg
- ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p6, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.11)
- ; EXPAND-NEXT: $p5 = frame-destroy PTRUE_B 31, implicit $vg
- ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p5, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.10)
- ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
- ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 20, implicit $vg
; EXPAND-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.22)
; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0
@@ -842,6 +846,7 @@ body: |
liveins: $p0, $p1, $p2, $p3
; CHECK-LABEL: name: zpr_predicate_spill_above_p7
+ ; CHECK: stack:
; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16,
; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register:
; CHECK: liveins: $p0, $p1, $p2, $p3
@@ -969,14 +974,14 @@ body: |
; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.5)
- ; EXPAND-NEXT: $p7 = frame-destroy PTRUE_B 31, implicit $vg
- ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p7, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.4)
- ; EXPAND-NEXT: $p6 = frame-destroy PTRUE_B 31, implicit $vg
- ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p6, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.3)
- ; EXPAND-NEXT: $p5 = frame-destroy PTRUE_B 31, implicit $vg
- ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p5, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.2)
; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
@@ -1026,9 +1031,11 @@ body: |
; CHECK-LABEL: name: zpr_predicate_spill_p4_saved
; CHECK: liveins: $p0, $p1, $p2, $p3
; CHECK-NEXT: {{ $}}
+ ;
; CHECK-NEXT: $p8 = IMPLICIT_DEF
- ; CHECK-NEXT: RET_ReallyLR implicit $p0, implicit $p1, implicit $p2, implicit $p3
;
+ ; CHECK-NEXT: RET_ReallyLR implicit $p0, implicit $p1, implicit $p2, implicit $p3
+
; EXPAND-LABEL: name: zpr_predicate_spill_p4_saved
; EXPAND: liveins: $p0, $p1, $p2, $p3, $fp, $p8, $p4
; EXPAND-NEXT: {{ $}}
>From d28c70e4fe6a37a07860291a9f4d708cf09bb69d Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Fri, 24 Jan 2025 17:48:52 +0000
Subject: [PATCH 4/5] Fixups
---
.../Target/AArch64/AArch64FrameLowering.cpp | 55 +++++++++++--------
1 file changed, 31 insertions(+), 24 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 5bbf07b607bc30..d1e2d5ab67c4cd 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -4434,42 +4434,49 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
// If predicates spills are 16-bytes we may need to expand
// SPILL_PPR_TO_ZPR_SLOT_PSEUDO/FILL_PPR_FROM_ZPR_SLOT_PSEUDO.
- const MachineFrameInfo &MFI = MF.getFrameInfo();
- assert(MFI.isCalleeSavedInfoValid());
- const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+ const uint32_t *CSRMask =
+ TRI.getCallPreservedMask(MF, MF.getFunction().getCallingConv());
auto ComputeScavengeableRegisters = [&](unsigned RegClassID) {
BitVector Regs = TRI.getAllocatableSet(MF, TRI.getRegClass(RegClassID));
-
- for (const CalleeSavedInfo &I : CSI)
- if (TRI.getRegClass(RegClassID)->contains(I.getReg()))
- Regs.set(I.getReg());
-
+ Regs.clearBitsInMask(CSRMask);
assert(Regs.count() > 0 && "Expected scavengeable registers");
return Regs;
};
- const uint32_t *CSRMask =
- TRI.getCallPreservedMask(MF, MF.getFunction().getCallingConv());
-
- // Registers free to scavenge in the function body.
- ScavengeableRegs ScavengeableRegsBody;
- ScavengeableRegsBody.ZPRRegs =
+ // Registers free to scavenge in the prologue/epilogue.
+ ScavengeableRegs ScavengeableRegsFrameSetup;
+ ScavengeableRegsFrameSetup.ZPRRegs =
ComputeScavengeableRegisters(AArch64::ZPRRegClassID);
// Only p0-7 are possible as the second operand of cmpne (needed for fills).
- ScavengeableRegsBody.PPR3bRegs =
+ ScavengeableRegsFrameSetup.PPR3bRegs =
ComputeScavengeableRegisters(AArch64::PPR_3bRegClassID);
- ScavengeableRegsBody.GPRRegs =
+ ScavengeableRegsFrameSetup.GPRRegs =
ComputeScavengeableRegisters(AArch64::GPR64RegClassID);
- // Registers free to scavenge in the prologue/epilogue.
- ScavengeableRegs ScavengeableRegsFrameSetup = ScavengeableRegsBody;
- ScavengeableRegsFrameSetup.ZPRRegs.clearBitsInMask(CSRMask);
- ScavengeableRegsFrameSetup.GPRRegs.clearBitsInMask(CSRMask);
- // Note: If p4 was available allow it to be scavenged (even though it is a
- // CSR). P4 is reloaded last in the epilogue and is needed to reload
- // predicates >= p8 if p0-p3 are used as return values.
- ScavengeableRegsFrameSetup.PPR3bRegs.clearBitsInMask(CSRMask);
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ assert(MFI.isCalleeSavedInfoValid());
+ const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+ auto MarkSavedRegistersAsAvailable =
+ [&, &Reserved = MF.getRegInfo().getReservedRegs()](
+ BitVector &Regs, unsigned RegClassID) {
+ for (const CalleeSavedInfo &I : CSI)
+ if (!Reserved[I.getReg()] &&
+ TRI.getRegClass(RegClassID)->contains(I.getReg()))
+ Regs.set(I.getReg());
+ };
+
+ // Registers free to scavenge in the function body.
+ ScavengeableRegs ScavengeableRegsBody = ScavengeableRegsFrameSetup;
+ MarkSavedRegistersAsAvailable(ScavengeableRegsBody.ZPRRegs,
+ AArch64::ZPRRegClassID);
+ MarkSavedRegistersAsAvailable(ScavengeableRegsBody.PPR3bRegs,
+ AArch64::PPR_3bRegClassID);
+ MarkSavedRegistersAsAvailable(ScavengeableRegsBody.GPRRegs,
+ AArch64::GPR64RegClassID);
+
+ // p4 (CSR) is reloaded last in the epilogue, so if it is saved, it can be
+ // used to reload other predicates.
if (ScavengeableRegsBody.PPR3bRegs[AArch64::P4])
ScavengeableRegsFrameSetup.PPR3bRegs.set(AArch64::P4);
>From 213d5aa9007243a7c8610e561763b2ed3361a356 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Fri, 24 Jan 2025 17:53:41 +0000
Subject: [PATCH 5/5] Fixups
---
llvm/lib/Target/AArch64/AArch64FrameLowering.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index d1e2d5ab67c4cd..7d3c79f269008d 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -4439,7 +4439,8 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
auto ComputeScavengeableRegisters = [&](unsigned RegClassID) {
BitVector Regs = TRI.getAllocatableSet(MF, TRI.getRegClass(RegClassID));
- Regs.clearBitsInMask(CSRMask);
+ if (CSRMask)
+ Regs.clearBitsInMask(CSRMask);
assert(Regs.count() > 0 && "Expected scavengeable registers");
return Regs;
};
More information about the llvm-commits
mailing list