[llvm] 84a0c8e - [AArch64][SVE] Spilling/filling of SVE callee-saves.
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 11 01:05:00 PST 2019
Author: Sander de Smalen
Date: 2019-11-11T09:03:19Z
New Revision: 84a0c8e3ae92829c4f04ba995b4b6283d397f65d
URL: https://github.com/llvm/llvm-project/commit/84a0c8e3ae92829c4f04ba995b4b6283d397f65d
DIFF: https://github.com/llvm/llvm-project/commit/84a0c8e3ae92829c4f04ba995b4b6283d397f65d.diff
LOG: [AArch64][SVE] Spilling/filling of SVE callee-saves.
Implement the spills/fills of callee-saved SVE registers using STR and LDR
instructions.
Also adds the `aarch64_sve_vector_pcs` attribute to specify the
callee-saved registers to be used for functions that return SVE vectors or
take SVE vectors as arguments. The callee-saved registers are vector
registers z8-z23 and predicate registers p4-p15.
The overal frame-layout with SVE will be as follows:
+-------------+
| stack args |
+-------------+
| Callee Saves|
| X29, X30 |
|-------------| <- FP
| SVE Callee | < //////////////
| saved regs | < //////////////
| z23 | < //////////////
| : | < // SCALABLE //
| z8 | < //////////////
| p15 | < /// STACK ////
| : | < //////////////
| p4 | < //// AREA ////
+-------------+ < //////////////
| : | < //////////////
| SVE locals | < //////////////
| : | < //////////////
+-------------+
|/////////////| alignment gap.
| : |
| Stack objs |
| : |
+-------------+ <- SP after call and frame-setup
Reviewers: cameron.mcinally, efriedma, greened, thegameg, ostannard, rengolin
Reviewed By: ostannard
Differential Revision: https://reviews.llvm.org/D68996
Added:
Modified:
llvm/lib/AsmParser/LLLexer.cpp
llvm/lib/AsmParser/LLParser.cpp
llvm/lib/AsmParser/LLToken.h
llvm/lib/IR/AsmWriter.cpp
llvm/lib/Target/AArch64/AArch64CallingConvention.td
llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
llvm/lib/Target/AArch64/AArch64FrameLowering.h
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
llvm/test/CodeGen/AArch64/framelayout-sve.mir
Removed:
################################################################################
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index 887a931c64d2..847ca0430e67 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -594,6 +594,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(arm_aapcscc);
KEYWORD(arm_aapcs_vfpcc);
KEYWORD(aarch64_vector_pcs);
+ KEYWORD(aarch64_sve_vector_pcs);
KEYWORD(msp430_intrcc);
KEYWORD(avr_intrcc);
KEYWORD(avr_signalcc);
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index e9d2dfc195b1..bb2c65f6d9a6 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -1931,6 +1931,7 @@ void LLParser::ParseOptionalDLLStorageClass(unsigned &Res) {
/// ::= 'arm_aapcscc'
/// ::= 'arm_aapcs_vfpcc'
/// ::= 'aarch64_vector_pcs'
+/// ::= 'aarch64_sve_vector_pcs'
/// ::= 'msp430_intrcc'
/// ::= 'avr_intrcc'
/// ::= 'avr_signalcc'
@@ -1977,6 +1978,9 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
case lltok::kw_arm_aapcscc: CC = CallingConv::ARM_AAPCS; break;
case lltok::kw_arm_aapcs_vfpcc:CC = CallingConv::ARM_AAPCS_VFP; break;
case lltok::kw_aarch64_vector_pcs:CC = CallingConv::AArch64_VectorCall; break;
+ case lltok::kw_aarch64_sve_vector_pcs:
+ CC = CallingConv::AArch64_SVE_VectorCall;
+ break;
case lltok::kw_msp430_intrcc: CC = CallingConv::MSP430_INTR; break;
case lltok::kw_avr_intrcc: CC = CallingConv::AVR_INTR; break;
case lltok::kw_avr_signalcc: CC = CallingConv::AVR_SIGNAL; break;
diff --git a/llvm/lib/AsmParser/LLToken.h b/llvm/lib/AsmParser/LLToken.h
index 9153b49aa045..9029e15af9fd 100644
--- a/llvm/lib/AsmParser/LLToken.h
+++ b/llvm/lib/AsmParser/LLToken.h
@@ -142,6 +142,7 @@ enum Kind {
kw_arm_aapcscc,
kw_arm_aapcs_vfpcc,
kw_aarch64_vector_pcs,
+ kw_aarch64_sve_vector_pcs,
kw_msp430_intrcc,
kw_avr_intrcc,
kw_avr_signalcc,
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 107b32ea3263..5ee0d52fe995 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -364,6 +364,9 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
case CallingConv::ARM_AAPCS: Out << "arm_aapcscc"; break;
case CallingConv::ARM_AAPCS_VFP: Out << "arm_aapcs_vfpcc"; break;
case CallingConv::AArch64_VectorCall: Out << "aarch64_vector_pcs"; break;
+ case CallingConv::AArch64_SVE_VectorCall:
+ Out << "aarch64_sve_vector_pcs";
+ break;
case CallingConv::MSP430_INTR: Out << "msp430_intrcc"; break;
case CallingConv::AVR_INTR: Out << "avr_intrcc "; break;
case CallingConv::AVR_SIGNAL: Out << "avr_signalcc "; break;
diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index 3c4121b1185e..3c179ae76f0e 100644
--- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -405,10 +405,10 @@ def CSR_AArch64_AAVPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
// Functions taking SVE arguments or returning an SVE type
// must (additionally) preserve full Z8-Z23 and predicate registers P4-P15
-def CSR_AArch64_SVE_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
- X25, X26, X27, X28, LR, FP,
- (sequence "Z%u", 8, 23),
- (sequence "P%u", 4, 15))>;
+def CSR_AArch64_SVE_AAPCS : CalleeSavedRegs<(add (sequence "Z%u", 8, 23),
+ (sequence "P%u", 4, 15),
+ X19, X20, X21, X22, X23, X24,
+ X25, X26, X27, X28, LR, FP)>;
// Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since
// 'this' and the pointer return value are both passed in X0 in these cases,
@@ -486,5 +486,7 @@ def CSR_AArch64_RT_MostRegs_SCS
: CalleeSavedRegs<(add CSR_AArch64_RT_MostRegs, X18)>;
def CSR_AArch64_AAVPCS_SCS
: CalleeSavedRegs<(add CSR_AArch64_AAVPCS, X18)>;
+def CSR_AArch64_SVE_AAPCS_SCS
+ : CalleeSavedRegs<(add CSR_AArch64_SVE_AAPCS, X18)>;
def CSR_AArch64_AAPCS_SCS
: CalleeSavedRegs<(add CSR_AArch64_AAPCS, X18)>;
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index bbd7c51fde94..afe4f1402cf3 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -834,6 +834,20 @@ static bool isTargetDarwin(const MachineFunction &MF) {
return MF.getSubtarget<AArch64Subtarget>().isTargetDarwin();
}
+// Convenience function to determine whether I is an SVE callee save.
+bool IsSVECalleeSave(MachineBasicBlock::iterator I) {
+ switch (I->getOpcode()) {
+ default:
+ return false;
+ case AArch64::STR_ZXI:
+ case AArch64::STR_PXI:
+ case AArch64::LDR_ZXI:
+ case AArch64::LDR_PXI:
+ return I->getFlag(MachineInstr::FrameSetup) ||
+ I->getFlag(MachineInstr::FrameDestroy);
+ }
+}
+
void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -965,7 +979,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// and pre-inc if we decided to combine the callee-save and local stack
// pointer bump above.
MachineBasicBlock::iterator End = MBB.end();
- while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) {
+ while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) &&
+ !IsSVECalleeSave(MBBI)) {
if (CombineSPBump)
fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(),
NeedsWinCFI, &HasWinCFI);
@@ -1107,7 +1122,35 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
NumBytes = 0;
}
- emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -SVEStackSize, TII,
+ StackOffset AllocateBefore = SVEStackSize, AllocateAfter = {};
+ MachineBasicBlock::iterator CalleeSavesBegin = MBBI, CalleeSavesEnd = MBBI;
+
+ // Process the SVE callee-saves to determine what space needs to be
+ // allocated.
+ if (AFI->getSVECalleeSavedStackSize()) {
+ // Find callee save instructions in frame.
+ CalleeSavesBegin = MBBI;
+ assert(IsSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
+ while (IsSVECalleeSave(MBBI) && MBBI != MBB.getFirstTerminator())
+ ++MBBI;
+ CalleeSavesEnd = MBBI;
+
+ int64_t OffsetToFirstCalleeSaveFromSP =
+ MFI.getObjectOffset(AFI->getMaxSVECSFrameIndex());
+ StackOffset OffsetToCalleeSavesFromSP =
+ StackOffset(OffsetToFirstCalleeSaveFromSP, MVT::nxv1i8) + SVEStackSize;
+ AllocateBefore -= OffsetToCalleeSavesFromSP;
+ AllocateAfter = SVEStackSize - AllocateBefore;
+ }
+
+ // Allocate space for the callee saves (if any).
+ emitFrameOffset(MBB, CalleeSavesBegin, DL, AArch64::SP, AArch64::SP,
+ -AllocateBefore, TII,
+ MachineInstr::FrameSetup);
+
+ // Finally allocate remaining SVE stack space.
+ emitFrameOffset(MBB, CalleeSavesEnd, DL, AArch64::SP, AArch64::SP,
+ -AllocateAfter, TII,
MachineInstr::FrameSetup);
// Allocate space for the rest of the frame.
@@ -1444,7 +1487,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock::iterator Begin = MBB.begin();
while (LastPopI != Begin) {
--LastPopI;
- if (!LastPopI->getFlag(MachineInstr::FrameDestroy)) {
+ if (!LastPopI->getFlag(MachineInstr::FrameDestroy) ||
+ IsSVECalleeSave(LastPopI)) {
++LastPopI;
break;
} else if (CombineSPBump)
@@ -1476,11 +1520,53 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
NumBytes -= PrologueSaveSize;
assert(NumBytes >= 0 && "Negative stack allocation size!?");
+ // Process the SVE callee-saves to determine what space needs to be
+ // deallocated.
+ StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
+ MachineBasicBlock::iterator RestoreBegin = LastPopI, RestoreEnd = LastPopI;
+ if (AFI->getSVECalleeSavedStackSize()) {
+ RestoreBegin = std::prev(RestoreEnd);;
+ while (IsSVECalleeSave(RestoreBegin) &&
+ RestoreBegin != MBB.begin())
+ --RestoreBegin;
+ ++RestoreBegin;
+
+ assert(IsSVECalleeSave(RestoreBegin) &&
+ IsSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");
+
+ int64_t OffsetToFirstCalleeSaveFromSP =
+ MFI.getObjectOffset(AFI->getMaxSVECSFrameIndex());
+ StackOffset OffsetToCalleeSavesFromSP =
+ StackOffset(OffsetToFirstCalleeSaveFromSP, MVT::nxv1i8) + SVEStackSize;
+ DeallocateBefore = OffsetToCalleeSavesFromSP;
+ DeallocateAfter = SVEStackSize - DeallocateBefore;
+ }
+
// Deallocate the SVE area.
- if (SVEStackSize)
- if (!AFI->isStackRealigned())
- emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, SVEStackSize,
- TII, MachineInstr::FrameDestroy);
+ if (SVEStackSize) {
+ if (AFI->isStackRealigned()) {
+ if (AFI->getSVECalleeSavedStackSize())
+ // Set SP to start of SVE area, from which the callee-save reloads
+ // can be done. The code below will deallocate the stack space
+ // space by moving FP -> SP.
+ emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::FP,
+ -SVEStackSize, TII, MachineInstr::FrameDestroy);
+ } else {
+ if (AFI->getSVECalleeSavedStackSize()) {
+ // Deallocate the non-SVE locals first before we can deallocate (and
+ // restore callee saves) from the SVE area.
+ emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
+ {NumBytes, MVT::i8}, TII, MachineInstr::FrameDestroy);
+ NumBytes = 0;
+ }
+
+ emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
+ DeallocateBefore, TII, MachineInstr::FrameDestroy);
+
+ emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
+ DeallocateAfter, TII, MachineInstr::FrameDestroy);
+ }
+ }
if (!hasFP(MF)) {
bool RedZone = canUseRedZone(MF);
@@ -1813,11 +1899,28 @@ struct RegPairInfo {
unsigned Reg2 = AArch64::NoRegister;
int FrameIdx;
int Offset;
- enum RegType { GPR, FPR64, FPR128 } Type;
+ enum RegType { GPR, FPR64, FPR128, PPR, ZPR } Type;
RegPairInfo() = default;
bool isPaired() const { return Reg2 != AArch64::NoRegister; }
+
+ unsigned getScale() const {
+ switch (Type) {
+ case PPR:
+ return 2;
+ case GPR:
+ case FPR64:
+ return 8;
+ case ZPR:
+ case FPR128:
+ return 16;
+ default:
+ llvm_unreachable("Unsupported type");
+ }
+ }
+
+ bool isScalable() const { return Type == PPR || Type == ZPR; }
};
} // end anonymous namespace
@@ -1842,7 +1945,8 @@ static void computeCalleeSaveRegisterPairs(
CC == CallingConv::PreserveMost ||
(Count & 1) == 0) &&
"Odd number of callee-saved regs to spill!");
- int Offset = AFI->getCalleeSavedStackSize();
+ int ByteOffset = AFI->getCalleeSavedStackSize();
+ int ScalableByteOffset = AFI->getSVECalleeSavedStackSize();
// On Linux, we will have either one or zero non-paired register. On Windows
// with CFI, we can have multiple unpaired registers in order to utilize the
// available unwind codes. This flag assures that the alignment fixup is done
@@ -1858,6 +1962,10 @@ static void computeCalleeSaveRegisterPairs(
RPI.Type = RegPairInfo::FPR64;
else if (AArch64::FPR128RegClass.contains(RPI.Reg1))
RPI.Type = RegPairInfo::FPR128;
+ else if (AArch64::ZPRRegClass.contains(RPI.Reg1))
+ RPI.Type = RegPairInfo::ZPR;
+ else if (AArch64::PPRRegClass.contains(RPI.Reg1))
+ RPI.Type = RegPairInfo::PPR;
else
llvm_unreachable("Unsupported register class.");
@@ -1880,6 +1988,9 @@ static void computeCalleeSaveRegisterPairs(
if (AArch64::FPR128RegClass.contains(NextReg))
RPI.Reg2 = NextReg;
break;
+ case RegPairInfo::PPR:
+ case RegPairInfo::ZPR:
+ break;
}
}
@@ -1917,23 +2028,33 @@ static void computeCalleeSaveRegisterPairs(
RPI.FrameIdx = CSI[i].getFrameIdx();
- int Scale = RPI.Type == RegPairInfo::FPR128 ? 16 : 8;
- Offset -= RPI.isPaired() ? 2 * Scale : Scale;
+ int Scale = RPI.getScale();
+ if (RPI.isScalable())
+ ScalableByteOffset -= Scale;
+ else
+ ByteOffset -= RPI.isPaired() ? 2 * Scale : Scale;
+
+ assert(!(RPI.isScalable() && RPI.isPaired()) &&
+ "Paired spill/fill instructions don't exist for SVE vectors");
// Round up size of non-pair to pair size if we need to pad the
// callee-save area to ensure 16-byte alignment.
if (AFI->hasCalleeSaveStackFreeSpace() && !FixupDone &&
- RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired()) {
+ !RPI.isScalable() && RPI.Type != RegPairInfo::FPR128 &&
+ !RPI.isPaired()) {
FixupDone = true;
- Offset -= 8;
- assert(Offset % 16 == 0);
+ ByteOffset -= 8;
+ assert(ByteOffset % 16 == 0);
assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16);
MFI.setObjectAlignment(RPI.FrameIdx, 16);
}
+ int Offset = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
assert(Offset % Scale == 0);
RPI.Offset = Offset / Scale;
- assert((RPI.Offset >= -64 && RPI.Offset <= 63) &&
+
+ assert(((!RPI.isScalable() && RPI.Offset >= -64 && RPI.Offset <= 63) ||
+ (RPI.isScalable() && RPI.Offset >= -256 && RPI.Offset <= 255)) &&
"Offset out of bounds for LDP/STP immediate");
RegPairs.push_back(RPI);
@@ -2025,6 +2146,16 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
Size = 16;
Align = 16;
break;
+ case RegPairInfo::ZPR:
+ StrOpc = AArch64::STR_ZXI;
+ Size = 16;
+ Align = 16;
+ break;
+ case RegPairInfo::PPR:
+ StrOpc = AArch64::STR_PXI;
+ Size = 2;
+ Align = 2;
+ break;
}
LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
@@ -2065,6 +2196,11 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
if (NeedsWinCFI)
InsertSEH(MIB, TII, MachineInstr::FrameSetup);
+ // Update the StackIDs of the SVE stack slots.
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (RPI.Type == RegPairInfo::ZPR || RPI.Type == RegPairInfo::PPR)
+ MFI.setStackID(RPI.FrameIdx, TargetStackID::SVEVector);
+
}
return true;
}
@@ -2116,6 +2252,16 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
Size = 16;
Align = 16;
break;
+ case RegPairInfo::ZPR:
+ LdrOpc = AArch64::LDR_ZXI;
+ Size = 16;
+ Align = 16;
+ break;
+ case RegPairInfo::PPR:
+ LdrOpc = AArch64::LDR_PXI;
+ Size = 2;
+ Align = 2;
+ break;
}
LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI);
if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
@@ -2150,12 +2296,20 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
if (NeedsWinCFI)
InsertSEH(MIB, TII, MachineInstr::FrameDestroy);
};
- if (ReverseCSRRestoreSeq)
- for (const RegPairInfo &RPI : reverse(RegPairs))
+
+ // SVE objects are always restored in reverse order.
+ for (const RegPairInfo &RPI : reverse(RegPairs))
+ if (RPI.isScalable())
EmitMI(RPI);
- else
+
+ if (ReverseCSRRestoreSeq) {
+ for (const RegPairInfo &RPI : reverse(RegPairs))
+ if (!RPI.isScalable())
+ EmitMI(RPI);
+ } else
for (const RegPairInfo &RPI : RegPairs)
- EmitMI(RPI);
+ if (!RPI.isScalable())
+ EmitMI(RPI);
if (NeedShadowCallStackProlog) {
// Shadow call stack epilog: ldr x30, [x18, #-8]!
@@ -2202,7 +2356,12 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
SavedRegs.set(Reg);
bool RegUsed = SavedRegs.test(Reg);
- unsigned PairedReg = CSRegs[i ^ 1];
+ unsigned PairedReg = AArch64::NoRegister;
+ if (AArch64::GPR64RegClass.contains(Reg) ||
+ AArch64::FPR64RegClass.contains(Reg) ||
+ AArch64::FPR128RegClass.contains(Reg))
+ PairedReg = CSRegs[i ^ 1];
+
if (!RegUsed) {
if (AArch64::GPR64RegClass.contains(Reg) &&
!RegInfo->isReservedReg(MF, Reg)) {
@@ -2226,10 +2385,17 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// Calculates the callee saved stack size.
unsigned CSStackSize = 0;
+ unsigned SVECSStackSize = 0;
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
const MachineRegisterInfo &MRI = MF.getRegInfo();
- for (unsigned Reg : SavedRegs.set_bits())
- CSStackSize += TRI->getRegSizeInBits(Reg, MRI) / 8;
+ for (unsigned Reg : SavedRegs.set_bits()) {
+ auto RegSize = TRI->getRegSizeInBits(Reg, MRI) / 8;
+ if (AArch64::PPRRegClass.contains(Reg) ||
+ AArch64::ZPRRegClass.contains(Reg))
+ SVECSStackSize += RegSize;
+ else
+ CSStackSize += RegSize;
+ }
// Save number of saved regs, so we can easily update CSStackSize later.
unsigned NumSavedRegs = SavedRegs.count();
@@ -2249,10 +2415,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
dbgs() << "\n";);
// If any callee-saved registers are used, the frame cannot be eliminated.
- unsigned MaxAlign = getStackAlignment();
int64_t SVEStackSize =
- alignTo(determineSVEStackSize(MFI, MaxAlign), MaxAlign);
- assert(MaxAlign <= 16 && "Cannot align scalable vectors more than 16 bytes");
+ alignTo(SVECSStackSize + estimateSVEStackObjectOffsets(MFI), 16);
bool CanEliminateFrame = (SavedRegs.count() == 0) && !SVEStackSize;
// The CSR spill slots have not been allocated yet, so estimateStackSize
@@ -2313,6 +2477,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// instructions.
AFI->setCalleeSavedStackSize(AlignedCSStackSize);
AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize);
+ AFI->setSVECalleeSavedStackSize(alignTo(SVECSStackSize, 16));
}
bool AArch64FrameLowering::enableStackSlotScavenging(
@@ -2321,9 +2486,39 @@ bool AArch64FrameLowering::enableStackSlotScavenging(
return AFI->hasCalleeSaveStackFreeSpace();
}
-int64_t AArch64FrameLowering::determineSVEStackSize(MachineFrameInfo &MFI,
- unsigned &MaxAlign) const {
- // Process all fixed stack objects.
+/// returns true if there are any SVE callee saves.
+static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI,
+ int &Min, int &Max) {
+ if (!MFI.isCalleeSavedInfoValid())
+ return false;
+
+ Min = std::numeric_limits<int>::max();
+ Max = std::numeric_limits<int>::min();
+ const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+ for (auto &CS : CSI) {
+ if (AArch64::ZPRRegClass.contains(CS.getReg()) ||
+ AArch64::PPRRegClass.contains(CS.getReg())) {
+ assert((Max == std::numeric_limits<int>::min() ||
+ Max + 1 == CS.getFrameIdx()) &&
+ "SVE CalleeSaves are not consecutive");
+
+ Min = std::min(Min, CS.getFrameIdx());
+ Max = std::max(Max, CS.getFrameIdx());
+ }
+ }
+ return Min != std::numeric_limits<int>::max();
+}
+
+// Process all the SVE stack objects and determine offsets for each
+// object. If AssignOffsets is true, the offsets get assigned.
+// Fills in the first and last callee-saved frame indices into
+// Min/MaxCSFrameIndex, respectively.
+// Returns the size of the stack.
+static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
+ int &MinCSFrameIndex,
+ int &MaxCSFrameIndex,
+ bool AssignOffsets) {
+ // First process all fixed stack objects.
int64_t Offset = 0;
for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
if (MFI.getStackID(I) == TargetStackID::SVEVector) {
@@ -2332,12 +2527,41 @@ int64_t AArch64FrameLowering::determineSVEStackSize(MachineFrameInfo &MFI,
Offset = FixedOffset;
}
+ // Then process all callee saved slots.
+ if (getSVECalleeSaveSlotRange(MFI, MinCSFrameIndex, MaxCSFrameIndex)) {
+ // Make sure to align the last callee save slot.
+ MFI.setObjectAlignment(MaxCSFrameIndex, 16U);
+
+ // Assign offsets to the callee save slots.
+ for (int I = MinCSFrameIndex; I <= MaxCSFrameIndex; ++I) {
+ Offset += MFI.getObjectSize(I);
+ Offset = alignTo(Offset, MFI.getObjectAlignment(I));
+ if (AssignOffsets) {
+ LLVM_DEBUG(dbgs() << "alloc FI(" << I << ") at SP[" << Offset
+ << "]\n");
+ MFI.setObjectOffset(I, -Offset);
+ }
+ }
+ }
+
// Note: We don't take allocatable stack objects into
// account yet, because allocation for those is not yet
// implemented.
return Offset;
}
+int64_t AArch64FrameLowering::estimateSVEStackObjectOffsets(
+ MachineFrameInfo &MFI) const {
+ int MinCSFrameIndex, MaxCSFrameIndex;
+ return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex, false);
+}
+
+int64_t AArch64FrameLowering::assignSVEStackObjectOffsets(
+ MachineFrameInfo &MFI, int &MinCSFrameIndex, int &MaxCSFrameIndex) const {
+ return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex,
+ true);
+}
+
void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
MachineFunction &MF, RegScavenger *RS) const {
MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -2345,12 +2569,13 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown &&
"Upwards growing stack unsupported");
- unsigned MaxAlign = getStackAlignment();
- int64_t SVEStackSize = determineSVEStackSize(MFI, MaxAlign);
+ int MinCSFrameIndex, MaxCSFrameIndex;
+ int64_t SVEStackSize =
+ assignSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex);
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- AFI->setStackSizeSVE(alignTo(SVEStackSize, MaxAlign));
- assert(MaxAlign <= 16 && "Cannot align scalable vectors more than 16 bytes");
+ AFI->setStackSizeSVE(alignTo(SVEStackSize, 16U));
+ AFI->setMinMaxSVECSFrameIndex(MinCSFrameIndex, MaxCSFrameIndex);
// If this function isn't doing Win64-style C++ EH, we don't need to do
// anything.
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index ac150e86c9eb..f84847def34d 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -101,7 +101,11 @@ class AArch64FrameLowering : public TargetFrameLowering {
private:
bool shouldCombineCSRLocalStackBump(MachineFunction &MF,
unsigned StackBumpBytes) const;
- int64_t determineSVEStackSize(MachineFrameInfo &MF, unsigned &MaxAlign) const;
+
+ int64_t estimateSVEStackObjectOffsets(MachineFrameInfo &MF) const;
+ int64_t assignSVEStackObjectOffsets(MachineFrameInfo &MF,
+ int &MinCSFrameIndex,
+ int &MaxCSFrameIndex) const;
};
} // End llvm namespace
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 9705a1b94615..2a3b3a3ac2f8 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3118,6 +3118,9 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
switch (CC) {
default:
report_fatal_error("Unsupported calling convention.");
+ case CallingConv::AArch64_SVE_VectorCall:
+ // Calling SVE functions is currently not yet supported.
+ report_fatal_error("Unsupported calling convention.");
case CallingConv::WebKit_JS:
return CC_AArch64_WebKit_JS;
case CallingConv::GHC:
diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index 32661860934a..dc9ca277b47f 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -53,8 +53,13 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
/// Amount of stack frame size, not including callee-saved registers.
unsigned LocalStackSize;
+ /// The start and end frame indices for the SVE callee saves.
+ int MinSVECSFrameIndex;
+ int MaxSVECSFrameIndex;
+
/// Amount of stack frame size used for saving callee-saved registers.
unsigned CalleeSavedStackSize;
+ unsigned SVECalleeSavedStackSize;
bool HasCalleeSavedStackSize = false;
/// Number of TLS accesses using the special (combinable)
@@ -161,7 +166,6 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
void setCalleeSaveStackHasFreeSpace(bool s) {
CalleeSaveStackHasFreeSpace = s;
}
-
bool isSplitCSR() const { return IsSplitCSR; }
void setIsSplitCSR(bool s) { IsSplitCSR = s; }
@@ -218,6 +222,22 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
return CalleeSavedStackSize;
}
+ // Saves the CalleeSavedStackSize for SVE vectors in 'scalable bytes'
+ void setSVECalleeSavedStackSize(unsigned Size) {
+ SVECalleeSavedStackSize = Size;
+ }
+ unsigned getSVECalleeSavedStackSize() const {
+ return SVECalleeSavedStackSize;
+ }
+
+ void setMinMaxSVECSFrameIndex(int Min, int Max) {
+ MinSVECSFrameIndex = Min;
+ MaxSVECSFrameIndex = Max;
+ }
+
+ int getMinSVECSFrameIndex() const { return MinSVECSFrameIndex; }
+ int getMaxSVECSFrameIndex() const { return MaxSVECSFrameIndex; }
+
void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamicTLSAccesses; }
unsigned getNumLocalDynamicTLSAccesses() const {
return NumLocalDynamicTLSAccesses;
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 918e89fd8868..47ccef5ed83f 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -55,6 +55,8 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_AArch64_AllRegs_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::AArch64_VectorCall)
return CSR_AArch64_AAVPCS_SaveList;
+ if (MF->getFunction().getCallingConv() == CallingConv::AArch64_SVE_VectorCall)
+ return CSR_AArch64_SVE_AAPCS_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS)
return MF->getInfo<AArch64FunctionInfo>()->isSplitCSR() ?
CSR_AArch64_CXX_TLS_Darwin_PE_SaveList :
@@ -125,7 +127,8 @@ AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
if (CC == CallingConv::AArch64_VectorCall)
return SCS ? CSR_AArch64_AAVPCS_SCS_RegMask : CSR_AArch64_AAVPCS_RegMask;
if (CC == CallingConv::AArch64_SVE_VectorCall)
- return CSR_AArch64_SVE_AAPCS_RegMask;
+ return SCS ? CSR_AArch64_SVE_AAPCS_SCS_RegMask
+ : CSR_AArch64_SVE_AAPCS_RegMask;
if (CC == CallingConv::CFGuard_Check)
return CSR_Win_AArch64_CFGuard_Check_RegMask;
if (MF.getSubtarget<AArch64Subtarget>().getTargetLowering()
diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve.mir b/llvm/test/CodeGen/AArch64/framelayout-sve.mir
index 452ae94e2456..18d6796b172c 100644
--- a/llvm/test/CodeGen/AArch64/framelayout-sve.mir
+++ b/llvm/test/CodeGen/AArch64/framelayout-sve.mir
@@ -30,6 +30,10 @@
define void @test_address_sve_fp() nounwind { entry: unreachable }
define void @test_stack_arg_sve() nounwind { entry: unreachable }
define void @test_address_sve_out_of_range() nounwind { entry: unreachable }
+ define aarch64_sve_vector_pcs void @save_restore_pregs_sve() nounwind { entry: unreachable }
+ define aarch64_sve_vector_pcs void @save_restore_zregs_sve() nounwind { entry: unreachable }
+ define aarch64_sve_vector_pcs void @save_restore_sve() nounwind { entry: unreachable }
+ define aarch64_sve_vector_pcs void @save_restore_sve_realign() nounwind { entry: unreachable }
...
# +----------+
@@ -328,3 +332,183 @@ body: |
RET_ReallyLR
---
+...
+# CHECK-LABEL: name: save_restore_pregs_sve
+# CHECK: $sp = frame-setup ADDVL_XXI $sp, -1
+# CHECK: frame-setup STR_PXI killed $p6, $sp, 5
+# CHECK: frame-setup STR_PXI killed $p5, $sp, 6
+# CHECK: frame-setup STR_PXI killed $p4, $sp, 7
+# CHECK: $sp = frame-setup SUBXri $sp, 32, 0
+
+# CHECK: $sp = frame-destroy ADDXri $sp, 32, 0
+# CHECK: $p6 = frame-destroy LDR_PXI $sp, 5
+# CHECK: $p5 = frame-destroy LDR_PXI $sp, 6
+# CHECK: $p4 = frame-destroy LDR_PXI $sp, 7
+# CHECK: $sp = frame-destroy ADDVL_XXI $sp, 1
+# CHECK: RET_ReallyLR
+name: save_restore_pregs_sve
+stack:
+ - { id: 0, stack-id: default, size: 32, alignment: 16 }
+body: |
+ bb.0.entry:
+
+ $p4 = IMPLICIT_DEF
+ $p5 = IMPLICIT_DEF
+ $p6 = IMPLICIT_DEF
+
+ RET_ReallyLR
+---
+...
+# CHECK-LABEL: name: save_restore_zregs_sve
+# CHECK: $sp = frame-setup ADDVL_XXI $sp, -3
+# CHECK: frame-setup STR_ZXI killed $z10, $sp, 0
+# CHECK: frame-setup STR_ZXI killed $z9, $sp, 1
+# CHECK: frame-setup STR_ZXI killed $z8, $sp, 2
+# CHECK: $sp = frame-setup SUBXri $sp, 32, 0
+
+# CHECK: $sp = frame-destroy ADDXri $sp, 32, 0
+# CHECK: $z10 = frame-destroy LDR_ZXI $sp, 0
+# CHECK: $z9 = frame-destroy LDR_ZXI $sp, 1
+# CHECK: $z8 = frame-destroy LDR_ZXI $sp, 2
+# CHECK: $sp = frame-destroy ADDVL_XXI $sp, 3
+# CHECK: RET_ReallyLR
+name: save_restore_zregs_sve
+stack:
+ - { id: 0, stack-id: default, size: 32, alignment: 16 }
+body: |
+ bb.0.entry:
+
+ $z8 = IMPLICIT_DEF
+ $z9 = IMPLICIT_DEF
+ $z10 = IMPLICIT_DEF
+
+ RET_ReallyLR
+---
+...
+# Test allocation/deallocation of the stack frame together with the
+# saving/restoring of callee save registers. Fixed-stack objects
+# are allocated before the callee-saves.
+# This also adds some non-SVE callee-saves, to ensure that those are
+# paired correctly.
+#
+# CHECK-LABEL: name: save_restore_sve
+# CHECK: $sp = frame-setup STPXpre killed ${{[a-z0-9]+}}, killed $x21, $sp, -4
+# CHECK: frame-setup STPXi killed $x20, killed $x19, $sp, 2
+# CHECK: $sp = frame-setup ADDVL_XXI $sp, -19
+# CHECK: frame-setup STR_PXI killed $p15, $sp, 4
+# CHECK: frame-setup STR_PXI killed $p14, $sp, 5
+# CHECK: frame-setup STR_PXI killed $p5, $sp, 14
+# CHECK: frame-setup STR_PXI killed $p4, $sp, 15
+# CHECK: frame-setup STR_ZXI killed $z23, $sp, 2
+# CHECK: frame-setup STR_ZXI killed $z22, $sp, 3
+# CHECK: frame-setup STR_ZXI killed $z9, $sp, 16
+# CHECK: frame-setup STR_ZXI killed $z8, $sp, 17
+# CHECK: $sp = frame-setup SUBXri $sp, 32, 0
+
+# CHECK: $sp = frame-destroy ADDXri $sp, 32, 0
+# CHECK: $p15 = frame-destroy LDR_PXI $sp, 4
+# CHECK: $p14 = frame-destroy LDR_PXI $sp, 5
+# CHECK: $p5 = frame-destroy LDR_PXI $sp, 14
+# CHECK: $p4 = frame-destroy LDR_PXI $sp, 15
+# CHECK: $z23 = frame-destroy LDR_ZXI $sp, 2
+# CHECK: $z22 = frame-destroy LDR_ZXI $sp, 3
+# CHECK: $z9 = frame-destroy LDR_ZXI $sp, 16
+# CHECK: $z8 = frame-destroy LDR_ZXI $sp, 17
+# CHECK: $sp = frame-destroy ADDVL_XXI $sp, 19
+# CHECK: $x20, $x19 = frame-destroy LDPXi $sp, 2
+# CHECK: $sp, ${{[a-z0-9]+}}, $x21 = frame-destroy LDPXpost $sp, 4
+# CHECK: RET_ReallyLR
+name: save_restore_sve
+fixedStack:
+ - { id: 0, stack-id: sve-vec, size: 16, alignment: 16, offset: -16 }
+stack:
+ - { id: 0, stack-id: default, size: 32, alignment: 16 }
+body: |
+ bb.0.entry:
+
+ $z8_z9_z10_z11 = IMPLICIT_DEF
+ $z12_z13_z14_z15 = IMPLICIT_DEF
+ $z16_z17_z18_z19 = IMPLICIT_DEF
+ $z20_z21_z22_z23 = IMPLICIT_DEF
+ $z24_z25_z26_z27 = IMPLICIT_DEF
+ $z28_z29_z30_z31 = IMPLICIT_DEF
+ $p4 = IMPLICIT_DEF
+ $p5 = IMPLICIT_DEF
+ $p6 = IMPLICIT_DEF
+ $p7 = IMPLICIT_DEF
+ $p8 = IMPLICIT_DEF
+ $p9 = IMPLICIT_DEF
+ $p10 = IMPLICIT_DEF
+ $p11 = IMPLICIT_DEF
+ $p12 = IMPLICIT_DEF
+ $p13 = IMPLICIT_DEF
+ $p14 = IMPLICIT_DEF
+ $p15 = IMPLICIT_DEF
+
+ $x19 = IMPLICIT_DEF
+ $x20 = IMPLICIT_DEF
+ $x21 = IMPLICIT_DEF
+
+ RET_ReallyLR
+---
+...
+# Test allocation/deallocation of the stack frame together with the
+# saving/restoring of callee save registers. Fixed-stack objects
+# are allocated before the callee-saves.
+#
+# CHECK-LABEL: name: save_restore_sve_realign
+# CHECK: $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2
+# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -19
+# CHECK-NEXT: STR_PXI killed $p15, $sp, 4
+# CHECK-NEXT: STR_PXI killed $p14, $sp, 5
+# CHECK: STR_PXI killed $p5, $sp, 14
+# CHECK-NEXT: STR_PXI killed $p4, $sp, 15
+# CHECK-NEXT: STR_ZXI killed $z23, $sp, 2
+# CHECK-NEXT: STR_ZXI killed $z22, $sp, 3
+# CHECK: STR_ZXI killed $z9, $sp, 16
+# CHECK-NEXT: STR_ZXI killed $z8, $sp, 17
+# CHECK-NEXT: $[[TMP:x[0-9]+]] = frame-setup SUBXri $sp, 16, 0
+# CHECK-NEXT: $sp = ANDXri killed $[[TMP]]
+
+# CHECK: $sp = frame-destroy ADDVL_XXI $fp, -19
+# CHECK-NEXT: $p15 = frame-destroy LDR_PXI $sp, 4
+# CHECK-NEXT: $p14 = frame-destroy LDR_PXI $sp, 5
+# CHECK: $p5 = frame-destroy LDR_PXI $sp, 14
+# CHECK-NEXT: $p4 = frame-destroy LDR_PXI $sp, 15
+# CHECK-NEXT: $z23 = frame-destroy LDR_ZXI $sp, 2
+# CHECK-NEXT: $z22 = frame-destroy LDR_ZXI $sp, 3
+# CHECK: $z9 = frame-destroy LDR_ZXI $sp, 16
+# CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 17
+# CHECK-NEXT: $sp = frame-destroy ADDXri $fp, 0, 0
+# CHECK-NEXT: $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2
+# CHECK-NEXT: RET_ReallyLR
+name: save_restore_sve_realign
+fixedStack:
+ - { id: 0, stack-id: sve-vec, size: 16, alignment: 16, offset: -16 }
+stack:
+ - { id: 0, stack-id: default, size: 16, alignment: 32 }
+body: |
+ bb.0.entry:
+
+ $z8_z9_z10_z11 = IMPLICIT_DEF
+ $z12_z13_z14_z15 = IMPLICIT_DEF
+ $z16_z17_z18_z19 = IMPLICIT_DEF
+ $z20_z21_z22_z23 = IMPLICIT_DEF
+ $z24_z25_z26_z27 = IMPLICIT_DEF
+ $z28_z29_z30_z31 = IMPLICIT_DEF
+ $p4 = IMPLICIT_DEF
+ $p5 = IMPLICIT_DEF
+ $p6 = IMPLICIT_DEF
+ $p7 = IMPLICIT_DEF
+ $p8 = IMPLICIT_DEF
+ $p9 = IMPLICIT_DEF
+ $p10 = IMPLICIT_DEF
+ $p11 = IMPLICIT_DEF
+ $p12 = IMPLICIT_DEF
+ $p13 = IMPLICIT_DEF
+ $p14 = IMPLICIT_DEF
+ $p15 = IMPLICIT_DEF
+
+ RET_ReallyLR
+---
More information about the llvm-commits
mailing list