[llvm-branch-commits] [llvm] release/20.x: [AArch64][SME] [AArch64][SME] Spill p-regs as z-regs when streaming hazards are possible (PR #126503)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Feb 10 03:50:18 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Benjamin Maxwell (MacDue)
<details>
<summary>Changes</summary>
This cherry-picks commits: 82c6b8f7bbebc32751170267bbb7712f028cf587 and e470dcae8d2c4138a89974ceeb413b1568d3a112.
These are needed for the `-aarch64-enable-zpr-predicate-spills` flag. This is an off-by-default flag that converts predicate spills to data vector spills in streaming[-compatible] functions.
We think this should be fairly low risk as this feature needs to be manually enabled, but we'd like this to be for users to experiment with in LLVM 20.
---
Patch is 82.18 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/126503.diff
11 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64FrameLowering.cpp (+325-5)
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.cpp (+15-1)
- (modified) llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp (+2-2)
- (modified) llvm/lib/Target/AArch64/AArch64RegisterInfo.h (+1-1)
- (modified) llvm/lib/Target/AArch64/AArch64RegisterInfo.td (+10-1)
- (modified) llvm/lib/Target/AArch64/AArch64Subtarget.cpp (+19)
- (modified) llvm/lib/Target/AArch64/AArch64Subtarget.h (+2)
- (modified) llvm/lib/Target/AArch64/SMEInstrFormats.td (+14)
- (added) llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir (+1013)
- (modified) llvm/test/CodeGen/AArch64/ssve-stack-hazard-remarks.ll (+12-1)
- (modified) llvm/utils/TableGen/SubtargetEmitter.cpp (+21-1)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index a082a1ebe95bf84..d3abd79b85a75f7 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1634,6 +1634,9 @@ static bool IsSVECalleeSave(MachineBasicBlock::iterator I) {
case AArch64::STR_PXI:
case AArch64::LDR_ZXI:
case AArch64::LDR_PXI:
+ case AArch64::PTRUE_B:
+ case AArch64::CPY_ZPzI_B:
+ case AArch64::CMPNE_PPzZI_B:
return I->getFlag(MachineInstr::FrameSetup) ||
I->getFlag(MachineInstr::FrameDestroy);
}
@@ -3265,7 +3268,8 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
StrOpc = RPI.isPaired() ? AArch64::ST1B_2Z_IMM : AArch64::STR_ZXI;
break;
case RegPairInfo::PPR:
- StrOpc = AArch64::STR_PXI;
+ StrOpc =
+ Size == 16 ? AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO : AArch64::STR_PXI;
break;
case RegPairInfo::VG:
StrOpc = AArch64::STRXui;
@@ -3494,7 +3498,8 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
LdrOpc = RPI.isPaired() ? AArch64::LD1B_2Z_IMM : AArch64::LDR_ZXI;
break;
case RegPairInfo::PPR:
- LdrOpc = AArch64::LDR_PXI;
+ LdrOpc = Size == 16 ? AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO
+ : AArch64::LDR_PXI;
break;
case RegPairInfo::VG:
continue;
@@ -3720,6 +3725,14 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
continue;
}
+ // Always save P4 when PPR spills are ZPR-sized and a predicate above p8 is
+ // spilled. If all of p0-p3 are used as return values p4 is must be free
+ // to reload p8-p15.
+ if (RegInfo->getSpillSize(AArch64::PPRRegClass) == 16 &&
+ AArch64::PPR_p8to15RegClass.contains(Reg)) {
+ SavedRegs.set(AArch64::P4);
+ }
+
// MachO's compact unwind format relies on all registers being stored in
// pairs.
// FIXME: the usual format is actually better if unwinding isn't needed.
@@ -4159,8 +4172,312 @@ int64_t AArch64FrameLowering::assignSVEStackObjectOffsets(
true);
}
+/// Attempts to scavenge a register from \p ScavengeableRegs given the used
+/// registers in \p UsedRegs.
+static Register tryScavengeRegister(LiveRegUnits const &UsedRegs,
+ BitVector const &ScavengeableRegs,
+ Register PreferredReg) {
+ if (PreferredReg != AArch64::NoRegister && UsedRegs.available(PreferredReg))
+ return PreferredReg;
+ for (auto Reg : ScavengeableRegs.set_bits()) {
+ if (UsedRegs.available(Reg))
+ return Reg;
+ }
+ return AArch64::NoRegister;
+}
+
+/// Propagates frame-setup/destroy flags from \p SourceMI to all instructions in
+/// \p MachineInstrs.
+static void propagateFrameFlags(MachineInstr &SourceMI,
+ ArrayRef<MachineInstr *> MachineInstrs) {
+ for (MachineInstr *MI : MachineInstrs) {
+ if (SourceMI.getFlag(MachineInstr::FrameSetup))
+ MI->setFlag(MachineInstr::FrameSetup);
+ if (SourceMI.getFlag(MachineInstr::FrameDestroy))
+ MI->setFlag(MachineInstr::FrameDestroy);
+ }
+}
+
+/// RAII helper class for scavenging or spilling a register. On construction
+/// attempts to find a free register of class \p RC (given \p UsedRegs and \p
+/// AllocatableRegs), if no register can be found spills \p SpillCandidate to \p
+/// MaybeSpillFI to free a register. The free'd register is returned via the \p
+/// FreeReg output parameter. On destruction, if there is a spill, its previous
+/// value is reloaded. The spilling and scavenging is only valid at the
+/// insertion point \p MBBI, this class should _not_ be used in places that
+/// create or manipulate basic blocks, moving the expected insertion point.
+struct ScopedScavengeOrSpill {
+ ScopedScavengeOrSpill(const ScopedScavengeOrSpill &) = delete;
+ ScopedScavengeOrSpill(ScopedScavengeOrSpill &&) = delete;
+
+ ScopedScavengeOrSpill(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ Register SpillCandidate, const TargetRegisterClass &RC,
+ LiveRegUnits const &UsedRegs,
+ BitVector const &AllocatableRegs,
+ std::optional<int> *MaybeSpillFI,
+ Register PreferredReg = AArch64::NoRegister)
+ : MBB(MBB), MBBI(MBBI), RC(RC), TII(static_cast<const AArch64InstrInfo &>(
+ *MF.getSubtarget().getInstrInfo())),
+ TRI(*MF.getSubtarget().getRegisterInfo()) {
+ FreeReg = tryScavengeRegister(UsedRegs, AllocatableRegs, PreferredReg);
+ if (FreeReg != AArch64::NoRegister)
+ return;
+ assert(MaybeSpillFI && "Expected emergency spill slot FI information "
+ "(attempted to spill in prologue/epilogue?)");
+ if (!MaybeSpillFI->has_value()) {
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ *MaybeSpillFI = MFI.CreateSpillStackObject(TRI.getSpillSize(RC),
+ TRI.getSpillAlign(RC));
+ }
+ FreeReg = SpillCandidate;
+ SpillFI = MaybeSpillFI->value();
+ TII.storeRegToStackSlot(MBB, MBBI, FreeReg, false, *SpillFI, &RC, &TRI,
+ Register());
+ }
+
+ bool hasSpilled() const { return SpillFI.has_value(); }
+
+ /// Returns the free register (found from scavenging or spilling a register).
+ Register freeRegister() const { return FreeReg; }
+
+ Register operator*() const { return freeRegister(); }
+
+ ~ScopedScavengeOrSpill() {
+ if (hasSpilled())
+ TII.loadRegFromStackSlot(MBB, MBBI, FreeReg, *SpillFI, &RC, &TRI,
+ Register());
+ }
+
+private:
+ MachineBasicBlock &MBB;
+ MachineBasicBlock::iterator MBBI;
+ const TargetRegisterClass &RC;
+ const AArch64InstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+ Register FreeReg = AArch64::NoRegister;
+ std::optional<int> SpillFI;
+};
+
+/// Emergency stack slots for expanding SPILL_PPR_TO_ZPR_SLOT_PSEUDO and
+/// FILL_PPR_FROM_ZPR_SLOT_PSEUDO.
+struct EmergencyStackSlots {
+ std::optional<int> ZPRSpillFI;
+ std::optional<int> PPRSpillFI;
+ std::optional<int> GPRSpillFI;
+};
+
+/// Registers available for scavenging (ZPR, PPR3b, GPR).
+struct ScavengeableRegs {
+ BitVector ZPRRegs;
+ BitVector PPR3bRegs;
+ BitVector GPRRegs;
+};
+
+static bool isInPrologueOrEpilogue(const MachineInstr &MI) {
+ return MI.getFlag(MachineInstr::FrameSetup) ||
+ MI.getFlag(MachineInstr::FrameDestroy);
+}
+
+/// Expands:
+/// ```
+/// SPILL_PPR_TO_ZPR_SLOT_PSEUDO $p0, %stack.0, 0
+/// ```
+/// To:
+/// ```
+/// $z0 = CPY_ZPzI_B $p0, 1, 0
+/// STR_ZXI $z0, $stack.0, 0
+/// ```
+/// While ensuring a ZPR ($z0 in this example) is free for the predicate (
+/// spilling if necessary).
+static void expandSpillPPRToZPRSlotPseudo(MachineBasicBlock &MBB,
+ MachineInstr &MI,
+ const TargetRegisterInfo &TRI,
+ LiveRegUnits const &UsedRegs,
+ ScavengeableRegs const &SR,
+ EmergencyStackSlots &SpillSlots) {
+ MachineFunction &MF = *MBB.getParent();
+ auto *TII =
+ static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+ ScopedScavengeOrSpill ZPredReg(
+ MF, MBB, MI, AArch64::Z0, AArch64::ZPRRegClass, UsedRegs, SR.ZPRRegs,
+ isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.ZPRSpillFI);
+
+ SmallVector<MachineInstr *, 2> MachineInstrs;
+ const DebugLoc &DL = MI.getDebugLoc();
+ MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::CPY_ZPzI_B))
+ .addReg(*ZPredReg, RegState::Define)
+ .add(MI.getOperand(0))
+ .addImm(1)
+ .addImm(0)
+ .getInstr());
+ MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::STR_ZXI))
+ .addReg(*ZPredReg)
+ .add(MI.getOperand(1))
+ .addImm(MI.getOperand(2).getImm())
+ .setMemRefs(MI.memoperands())
+ .getInstr());
+ propagateFrameFlags(MI, MachineInstrs);
+}
+
+/// Expands:
+/// ```
+/// $p0 = FILL_PPR_FROM_ZPR_SLOT_PSEUDO %stack.0, 0
+/// ```
+/// To:
+/// ```
+/// $z0 = LDR_ZXI %stack.0, 0
+/// $p0 = PTRUE_B 31, implicit $vg
+/// $p0 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+/// ```
+/// While ensuring a ZPR ($z0 in this example) is free for the predicate (
+/// spilling if necessary). If the status flags are in use at the point of
+/// expansion they are preserved (by moving them to/from a GPR). This may cause
+/// an additional spill if no GPR is free at the expansion point.
+static bool expandFillPPRFromZPRSlotPseudo(
+ MachineBasicBlock &MBB, MachineInstr &MI, const TargetRegisterInfo &TRI,
+ LiveRegUnits const &UsedRegs, ScavengeableRegs const &SR,
+ MachineInstr *&LastPTrue, EmergencyStackSlots &SpillSlots) {
+ MachineFunction &MF = *MBB.getParent();
+ auto *TII =
+ static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+ ScopedScavengeOrSpill ZPredReg(
+ MF, MBB, MI, AArch64::Z0, AArch64::ZPRRegClass, UsedRegs, SR.ZPRRegs,
+ isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.ZPRSpillFI);
+
+ ScopedScavengeOrSpill PredReg(
+ MF, MBB, MI, AArch64::P0, AArch64::PPR_3bRegClass, UsedRegs, SR.PPR3bRegs,
+ isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.PPRSpillFI,
+ /*PreferredReg=*/
+ LastPTrue ? LastPTrue->getOperand(0).getReg() : AArch64::NoRegister);
+
+ // Elide NZCV spills if we know it is not used.
+ bool IsNZCVUsed = !UsedRegs.available(AArch64::NZCV);
+ std::optional<ScopedScavengeOrSpill> NZCVSaveReg;
+ if (IsNZCVUsed)
+ NZCVSaveReg.emplace(
+ MF, MBB, MI, AArch64::X0, AArch64::GPR64RegClass, UsedRegs, SR.GPRRegs,
+ isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.GPRSpillFI);
+ SmallVector<MachineInstr *, 4> MachineInstrs;
+ const DebugLoc &DL = MI.getDebugLoc();
+ MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::LDR_ZXI))
+ .addReg(*ZPredReg, RegState::Define)
+ .add(MI.getOperand(1))
+ .addImm(MI.getOperand(2).getImm())
+ .setMemRefs(MI.memoperands())
+ .getInstr());
+ if (IsNZCVUsed)
+ MachineInstrs.push_back(
+ BuildMI(MBB, MI, DL, TII->get(AArch64::MRS))
+ .addReg(NZCVSaveReg->freeRegister(), RegState::Define)
+ .addImm(AArch64SysReg::NZCV)
+ .addReg(AArch64::NZCV, RegState::Implicit)
+ .getInstr());
+
+ // Reuse previous ptrue if we know it has not been clobbered.
+ if (LastPTrue) {
+ assert(*PredReg == LastPTrue->getOperand(0).getReg());
+ LastPTrue->moveBefore(&MI);
+ } else {
+ LastPTrue = BuildMI(MBB, MI, DL, TII->get(AArch64::PTRUE_B))
+ .addReg(*PredReg, RegState::Define)
+ .addImm(31);
+ }
+ MachineInstrs.push_back(LastPTrue);
+ MachineInstrs.push_back(
+ BuildMI(MBB, MI, DL, TII->get(AArch64::CMPNE_PPzZI_B))
+ .addReg(MI.getOperand(0).getReg(), RegState::Define)
+ .addReg(*PredReg)
+ .addReg(*ZPredReg)
+ .addImm(0)
+ .addReg(AArch64::NZCV, RegState::ImplicitDefine)
+ .getInstr());
+ if (IsNZCVUsed)
+ MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::MSR))
+ .addImm(AArch64SysReg::NZCV)
+ .addReg(NZCVSaveReg->freeRegister())
+ .addReg(AArch64::NZCV, RegState::ImplicitDefine)
+ .getInstr());
+
+ propagateFrameFlags(MI, MachineInstrs);
+ return PredReg.hasSpilled();
+}
+
+/// Expands all FILL_PPR_FROM_ZPR_SLOT_PSEUDO and SPILL_PPR_TO_ZPR_SLOT_PSEUDO
+/// operations within the MachineBasicBlock \p MBB.
+static bool expandSMEPPRToZPRSpillPseudos(MachineBasicBlock &MBB,
+ const TargetRegisterInfo &TRI,
+ ScavengeableRegs const &SR,
+ EmergencyStackSlots &SpillSlots) {
+ LiveRegUnits UsedRegs(TRI);
+ UsedRegs.addLiveOuts(MBB);
+ bool HasPPRSpills = false;
+ MachineInstr *LastPTrue = nullptr;
+ for (MachineInstr &MI : make_early_inc_range(reverse(MBB))) {
+ UsedRegs.stepBackward(MI);
+ switch (MI.getOpcode()) {
+ case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
+ if (LastPTrue &&
+ MI.definesRegister(LastPTrue->getOperand(0).getReg(), &TRI))
+ LastPTrue = nullptr;
+ HasPPRSpills |= expandFillPPRFromZPRSlotPseudo(MBB, MI, TRI, UsedRegs, SR,
+ LastPTrue, SpillSlots);
+ MI.eraseFromParent();
+ break;
+ case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
+ expandSpillPPRToZPRSlotPseudo(MBB, MI, TRI, UsedRegs, SR, SpillSlots);
+ MI.eraseFromParent();
+ [[fallthrough]];
+ default:
+ LastPTrue = nullptr;
+ break;
+ }
+ }
+
+ return HasPPRSpills;
+}
+
void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
MachineFunction &MF, RegScavenger *RS) const {
+
+ AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ const TargetSubtargetInfo &TSI = MF.getSubtarget();
+ const TargetRegisterInfo &TRI = *TSI.getRegisterInfo();
+
+ // If predicates spills are 16-bytes we may need to expand
+ // SPILL_PPR_TO_ZPR_SLOT_PSEUDO/FILL_PPR_FROM_ZPR_SLOT_PSEUDO.
+ if (AFI->hasStackFrame() && TRI.getSpillSize(AArch64::PPRRegClass) == 16) {
+ auto ComputeScavengeableRegisters = [&](unsigned RegClassID) {
+ BitVector Regs = TRI.getAllocatableSet(MF, TRI.getRegClass(RegClassID));
+ assert(Regs.count() > 0 && "Expected scavengeable registers");
+ return Regs;
+ };
+
+ ScavengeableRegs SR{};
+ SR.ZPRRegs = ComputeScavengeableRegisters(AArch64::ZPRRegClassID);
+ // Only p0-7 are possible as the second operand of cmpne (needed for fills).
+ SR.PPR3bRegs = ComputeScavengeableRegisters(AArch64::PPR_3bRegClassID);
+ SR.GPRRegs = ComputeScavengeableRegisters(AArch64::GPR64RegClassID);
+
+ EmergencyStackSlots SpillSlots;
+ for (MachineBasicBlock &MBB : MF) {
+ // In the case we had to spill a predicate (in the range p0-p7) to reload
+ // a predicate (>= p8), additional spill/fill pseudos will be created.
+ // These need an additional expansion pass. Note: There will only be at
+ // most two expansion passes, as spilling/filling a predicate in the range
+ // p0-p7 never requires spilling another predicate.
+ for (int Pass = 0; Pass < 2; Pass++) {
+ bool HasPPRSpills =
+ expandSMEPPRToZPRSpillPseudos(MBB, TRI, SR, SpillSlots);
+ assert((Pass == 0 || !HasPPRSpills) && "Did not expect PPR spills");
+ if (!HasPPRSpills)
+ break;
+ }
+ }
+ }
+
MachineFrameInfo &MFI = MF.getFrameInfo();
assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown &&
@@ -4170,7 +4487,6 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
int64_t SVEStackSize =
assignSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex);
- AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
AFI->setStackSizeSVE(alignTo(SVEStackSize, 16U));
AFI->setMinMaxSVECSFrameIndex(MinCSFrameIndex, MaxCSFrameIndex);
@@ -5204,9 +5520,13 @@ void AArch64FrameLowering::emitRemarks(
unsigned RegTy = StackAccess::AccessType::GPR;
if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector) {
- if (AArch64::PPRRegClass.contains(MI.getOperand(0).getReg()))
+ // SPILL_PPR_TO_ZPR_SLOT_PSEUDO and FILL_PPR_FROM_ZPR_SLOT_PSEUDO
+ // spill/fill the predicate as a data vector (so are an FPR acess).
+ if (MI.getOpcode() != AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO &&
+ MI.getOpcode() != AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO &&
+ AArch64::PPRRegClass.contains(MI.getOperand(0).getReg())) {
RegTy = StackAccess::PPR;
- else
+ } else
RegTy = StackAccess::FPR;
} else if (AArch64InstrInfo::isFpOrNEON(MI)) {
RegTy = StackAccess::FPR;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 17dd8a073eff0f9..0f2b969fba35c7c 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -81,7 +81,7 @@ static cl::opt<unsigned>
AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
: AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
AArch64::CATCHRET),
- RI(STI.getTargetTriple()), Subtarget(STI) {}
+ RI(STI.getTargetTriple(), STI.getHwMode()), Subtarget(STI) {}
/// GetInstSize - Return the number of bytes of code the specified
/// instruction may be. This returns the maximum number of bytes.
@@ -2438,6 +2438,8 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
case AArch64::STZ2Gi:
case AArch64::STZGi:
case AArch64::TAGPstack:
+ case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
+ case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
return 2;
case AArch64::LD1B_D_IMM:
case AArch64::LD1B_H_IMM:
@@ -4223,6 +4225,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
MinOffset = -256;
MaxOffset = 254;
break;
+ case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
+ case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
case AArch64::LDR_ZXI:
case AArch64::STR_ZXI:
Scale = TypeSize::getScalable(16);
@@ -5355,6 +5359,11 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
"Unexpected register store without SVE store instructions");
Opc = AArch64::STR_ZXI;
StackID = TargetStackID::ScalableVector;
+ } else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.isSVEorStreamingSVEAvailable() &&
+ "Unexpected predicate store without SVE store instructions");
+ Opc = AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO;
+ StackID = TargetStackID::ScalableVector;
}
break;
case 24:
@@ -5527,6 +5536,11 @@ void AArch64InstrInfo::loadRegFromStackSlot(
"Unexpected register load without SVE load instructions");
Opc = AArch64::LDR_ZXI;
StackID = TargetStackID::ScalableVector;
+ } else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.isSVEorStreamingSVEAvailable() &&
+ "Unexpected predicate load without SVE load instructions");
+ Opc = AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO;
+ StackID = TargetStackID::ScalableVector;
}
break;
case 24:
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 5973b63b5a80243..e9730348ba58e5b 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -38,8 +38,8 @@ using namespace llvm;
#define GET_REGINFO_TARGET_DESC
#include "AArch64GenRegisterInfo.inc"
-AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT)
- : AArch64GenRegisterInfo(AArch64::LR), TT(TT) {
+AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT, unsigned HwMode)
+ : AArch64GenRegisterInfo(AArch64::LR, 0, 0, 0, HwMode), TT(TT) {
AArch64_MC::initLLVMToCVRegMapping(this);
}
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
index 11da624af4881b4..898a509f75908f8 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -27,7 +27,7 @@ class AArch64RegisterInfo final : public AArch64GenRegisterInfo {
const Triple &TT;
public:
- AArch64RegisterInfo(const Triple &TT);
+ AArch64RegisterInfo(const Triple &TT, unsigned HwMode);
// FIXM...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/126503
More information about the llvm-branch-commits
mailing list