[llvm] [ARM] Optimise non-ABI frame pointers (PR #110286)
Oliver Stannard via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 27 08:30:47 PDT 2024
https://github.com/ostannard created https://github.com/llvm/llvm-project/pull/110286
With -fomit-frame-pointer, even if we set up a frame pointer for other
reasons (e.g. variable-sized or over-aligned stack allocations), we
don't need to create an ABI-compliant frame record. This means that we
can save all of the general-purpose registers in one push, instead of
splitting it to ensure that the frame pointer and link register are
adjacent on the stack, saving two instructions per function.
The first 7 patches here are also included in #110283 and #110285, so the NFC and bug-fix parts can be reviewed separately.
>From 02f008ae83e16131b58b75f4befef2be4bcedbc4 Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Wed, 25 Sep 2024 17:14:18 +0100
Subject: [PATCH 1/9] [ARM] Tidy up stack frame strategy code (NFC)
We have two different ways of splitting the pushes of callee-saved
registers onto the stack, controlled by the confusingly similar names
STI.splitFramePushPop() and STI.splitFramePointerPush(). This removes
those functions and replaces them with a single function which returns
an enum. This is in preparation for adding another value to that enum.
The original work of this patch was done by James Westwood, reviewed as
#82801 and #81249, with some tidy-ups done by Mark Murray and myself.
---
llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp | 24 ++++---
llvm/lib/Target/ARM/ARMFrameLowering.cpp | 71 +++++++++++++--------
llvm/lib/Target/ARM/ARMSubtarget.cpp | 33 ++++++++--
llvm/lib/Target/ARM/ARMSubtarget.h | 41 ++++++++----
llvm/lib/Target/ARM/Thumb1FrameLowering.cpp | 6 +-
5 files changed, 120 insertions(+), 55 deletions(-)
diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index c149db3144c7c2..9bb1e9754217a1 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -62,27 +62,30 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo()
const MCPhysReg*
ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
const ARMSubtarget &STI = MF->getSubtarget<ARMSubtarget>();
- bool UseSplitPush = STI.splitFramePushPop(*MF);
+ ARMSubtarget::PushPopSplitVariation PushPopSplit =
+ STI.getPushPopSplitVariation(*MF);
const Function &F = MF->getFunction();
if (F.getCallingConv() == CallingConv::GHC) {
// GHC set of callee saved regs is empty as all those regs are
// used for passing STG regs around
return CSR_NoRegs_SaveList;
- } else if (STI.splitFramePointerPush(*MF)) {
+ } else if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
return CSR_Win_SplitFP_SaveList;
} else if (F.getCallingConv() == CallingConv::CFGuard_Check) {
return CSR_Win_AAPCS_CFGuard_Check_SaveList;
} else if (F.getCallingConv() == CallingConv::SwiftTail) {
- return STI.isTargetDarwin()
- ? CSR_iOS_SwiftTail_SaveList
- : (UseSplitPush ? CSR_ATPCS_SplitPush_SwiftTail_SaveList
- : CSR_AAPCS_SwiftTail_SaveList);
+ return STI.isTargetDarwin() ? CSR_iOS_SwiftTail_SaveList
+ : (PushPopSplit == ARMSubtarget::SplitR7
+ ? CSR_ATPCS_SplitPush_SwiftTail_SaveList
+ : CSR_AAPCS_SwiftTail_SaveList);
} else if (F.hasFnAttribute("interrupt")) {
if (STI.isMClass()) {
// M-class CPUs have hardware which saves the registers needed to allow a
// function conforming to the AAPCS to function as a handler.
- return UseSplitPush ? CSR_ATPCS_SplitPush_SaveList : CSR_AAPCS_SaveList;
+ return PushPopSplit == ARMSubtarget::SplitR7
+ ? CSR_ATPCS_SplitPush_SaveList
+ : CSR_AAPCS_SaveList;
} else if (F.getFnAttribute("interrupt").getValueAsString() == "FIQ") {
// Fast interrupt mode gives the handler a private copy of R8-R14, so less
// need to be saved to restore user-mode state.
@@ -99,8 +102,9 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (STI.isTargetDarwin())
return CSR_iOS_SwiftError_SaveList;
- return UseSplitPush ? CSR_ATPCS_SplitPush_SwiftError_SaveList :
- CSR_AAPCS_SwiftError_SaveList;
+ return PushPopSplit == ARMSubtarget::SplitR7
+ ? CSR_ATPCS_SplitPush_SwiftError_SaveList
+ : CSR_AAPCS_SwiftError_SaveList;
}
if (STI.isTargetDarwin() && F.getCallingConv() == CallingConv::CXX_FAST_TLS)
@@ -111,7 +115,7 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (STI.isTargetDarwin())
return CSR_iOS_SaveList;
- if (UseSplitPush)
+ if (PushPopSplit == ARMSubtarget::SplitR7)
return STI.createAAPCSFrameChain() ? CSR_AAPCS_SplitPush_SaveList
: CSR_ATPCS_SplitPush_SaveList;
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 40354f99559896..2df9756b9cd753 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -713,6 +713,8 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
/// this to produce a conservative estimate that we check in an assert() later.
static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
const MachineFunction &MF) {
+ ARMSubtarget::PushPopSplitVariation PushPopSplit =
+ STI.getPushPopSplitVariation(MF);
// For Thumb1, push.w isn't available, so the first push will always push
// r7 and lr onto the stack first.
if (AFI.isThumb1OnlyFunction())
@@ -720,9 +722,8 @@ static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
// This is a conservative estimation: Assume the frame pointer being r7 and
// pc("r15") up to r8 getting spilled before (= 8 registers).
int MaxRegBytes = 8 * 4;
- if (STI.splitFramePointerPush(MF)) {
- // Here, r11 can be stored below all of r4-r15 (3 registers more than
- // above), plus d8-d15.
+ if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
+ // Here, r11 can be stored below all of r4-r15 plus d8-d15.
MaxRegBytes = 11 * 4 + 8 * 8;
}
int FPCXTSaveSize =
@@ -749,6 +750,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
int FPCXTSaveSize = 0;
bool NeedsWinCFI = needsWinCFI(MF);
+ ARMSubtarget::PushPopSplitVariation PushPopSplit =
+ STI.getPushPopSplitVariation(MF);
// Debug location must be unknown since the first debug location is used
// to determine the end of the prologue.
@@ -789,7 +792,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
}
// Determine spill area sizes.
- if (STI.splitFramePointerPush(MF)) {
+ if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
for (const CalleeSavedInfo &I : CSI) {
Register Reg = I.getReg();
int FI = I.getFrameIdx();
@@ -835,7 +838,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
case ARM::R10:
case ARM::R11:
case ARM::R12:
- if (STI.splitFramePushPop(MF)) {
+ if (PushPopSplit == ARMSubtarget::SplitR7) {
GPRCS2Size += 4;
break;
}
@@ -898,13 +901,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
Align DPRAlign = DPRCSSize ? std::min(Align(8), Alignment) : Align(4);
unsigned DPRGapSize = GPRCS1Size + FPCXTSaveSize + ArgRegsSaveSize;
- if (!STI.splitFramePointerPush(MF)) {
+ if (PushPopSplit != ARMSubtarget::SplitR11WindowsSEH) {
DPRGapSize += GPRCS2Size;
}
DPRGapSize %= DPRAlign.value();
unsigned DPRCSOffset;
- if (STI.splitFramePointerPush(MF)) {
+ if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
DPRCSOffset = GPRCS1Offset - DPRGapSize - DPRCSSize;
GPRCS2Offset = DPRCSOffset - GPRCS2Size;
} else {
@@ -923,8 +926,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
- // Move past area 2.
- if (GPRCS2Size > 0 && !STI.splitFramePointerPush(MF)) {
+ // Move GPRCS2, unless using SplitR11WindowsSEH, in which case it will be
+ // after DPRCS1.
+ if (GPRCS2Size > 0 && PushPopSplit != ARMSubtarget::SplitR11WindowsSEH) {
GPRCS2Push = LastPush = MBBI++;
DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
}
@@ -943,7 +947,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
}
}
- // Move past area 3.
+ // Move past DPRCS1.
if (DPRCSSize > 0) {
// Since vpush register list cannot have gaps, there may be multiple vpush
// instructions in the prologue.
@@ -964,13 +968,14 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
} else
NumBytes = DPRCSOffset;
- if (GPRCS2Size > 0 && STI.splitFramePointerPush(MF)) {
+ // Move GPRCS2, if using using SplitR11WindowsSEH.
+ if (GPRCS2Size > 0 && PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
GPRCS2Push = LastPush = MBBI++;
DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
}
bool NeedsWinCFIStackAlloc = NeedsWinCFI;
- if (STI.splitFramePointerPush(MF) && HasFP)
+ if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH && HasFP)
NeedsWinCFIStackAlloc = false;
if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
@@ -1075,7 +1080,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
AfterPush = std::next(GPRCS1Push);
unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push);
int FPOffset = PushSize + FramePtrOffsetInPush;
- if (STI.splitFramePointerPush(MF)) {
+ if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
AfterPush = std::next(GPRCS2Push);
emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
FramePtr, ARM::SP, 0, MachineInstr::FrameSetup);
@@ -1107,7 +1112,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// instructions below don't need to be replayed to unwind the stack.
if (NeedsWinCFI && MBBI != MBB.begin()) {
MachineBasicBlock::iterator End = MBBI;
- if (HasFP && STI.splitFramePointerPush(MF))
+ if (HasFP && PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
End = AfterPush;
insertSEHRange(MBB, {}, End, TII, MachineInstr::FrameSetup);
BuildMI(MBB, End, dl, TII.get(ARM::SEH_PrologEnd))
@@ -1130,7 +1135,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
case ARM::R10:
case ARM::R11:
case ARM::R12:
- if (STI.splitFramePushPop(MF))
+ if (PushPopSplit == ARMSubtarget::SplitR7)
break;
[[fallthrough]];
case ARM::R0:
@@ -1163,7 +1168,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
case ARM::R10:
case ARM::R11:
case ARM::R12:
- if (STI.splitFramePushPop(MF)) {
+ if (PushPopSplit == ARMSubtarget::SplitR7) {
unsigned DwarfReg = MRI->getDwarfRegNum(
Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg, true);
int64_t Offset = MFI.getObjectOffset(FI);
@@ -1280,6 +1285,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
assert(!AFI->isThumb1OnlyFunction() &&
"This emitEpilogue does not support Thumb1!");
bool isARM = !AFI->isThumbFunction();
+ ARMSubtarget::PushPopSplitVariation PushPopSplit =
+ STI.getPushPopSplitVariation(MF);
// Amount of stack space we reserved next to incoming args for either
// varargs registers or stack arguments in tail calls made by this function.
@@ -1383,7 +1390,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
MachineInstr::FrameDestroy);
// Increment past our save areas.
- if (AFI->getGPRCalleeSavedArea2Size() && STI.splitFramePointerPush(MF))
+ if (AFI->getGPRCalleeSavedArea2Size() &&
+ PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
MBBI++;
if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) {
@@ -1400,7 +1408,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
MachineInstr::FrameDestroy);
}
- if (AFI->getGPRCalleeSavedArea2Size() && !STI.splitFramePointerPush(MF))
+ if (AFI->getGPRCalleeSavedArea2Size() &&
+ PushPopSplit != ARMSubtarget::SplitR11WindowsSEH)
MBBI++;
if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
@@ -1529,6 +1538,8 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
+ ARMSubtarget::PushPopSplitVariation PushPopSplit =
+ STI.getPushPopSplitVariation(MF);
DebugLoc DL;
@@ -1540,7 +1551,8 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
unsigned LastReg = 0;
for (; i != 0; --i) {
Register Reg = CSI[i-1].getReg();
- if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
+ if (!(Func)(Reg, PushPopSplit == ARMSubtarget::SplitR7))
+ continue;
// D-registers in the aligned area DPRCS2 are NOT spilled here.
if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
@@ -1613,6 +1625,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
bool isInterrupt = false;
bool isTrap = false;
bool isCmseEntry = false;
+ ARMSubtarget::PushPopSplitVariation PushPopSplit =
+ STI.getPushPopSplitVariation(MF);
if (MBB.end() != MI) {
DL = MI->getDebugLoc();
unsigned RetOpcode = MI->getOpcode();
@@ -1635,7 +1649,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
for (; i != 0; --i) {
CalleeSavedInfo &Info = CSI[i-1];
Register Reg = Info.getReg();
- if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
+ if (!(Func)(Reg, PushPopSplit == ARMSubtarget::SplitR7))
+ continue;
// The aligned reloads from area DPRCS2 are not inserted here.
if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
@@ -1643,7 +1658,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
!isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&
STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
- !STI.splitFramePointerPush(MF)) {
+ PushPopSplit != ARMSubtarget::SplitR11WindowsSEH) {
Reg = ARM::PC;
// Fold the return instruction into the LDM.
DeleteRet = true;
@@ -1983,6 +1998,8 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(
MachineFunction &MF = *MBB.getParent();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ ARMSubtarget::PushPopSplitVariation PushPopSplit =
+ STI.getPushPopSplitVariation(MF);
unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
unsigned PushOneOpc = AFI->isThumbFunction() ?
@@ -2004,7 +2021,7 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(
.addImm(-4)
.add(predOps(ARMCC::AL));
}
- if (STI.splitFramePointerPush(MF)) {
+ if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false,
&isSplitFPArea1Register, 0, MachineInstr::FrameSetup);
emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
@@ -2039,6 +2056,8 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
bool isVarArg = AFI->getArgRegsSaveSize() > 0;
unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
+ ARMSubtarget::PushPopSplitVariation PushPopSplit =
+ STI.getPushPopSplitVariation(MF);
// The emitPopInst calls below do not insert reloads for the aligned DPRCS2
// registers. Do that here instead.
@@ -2049,7 +2068,7 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(
unsigned LdrOpc =
AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
unsigned FltOpc = ARM::VLDMDIA_UPD;
- if (STI.splitFramePointerPush(MF)) {
+ if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
&isSplitFPArea2Register, 0);
emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
@@ -2287,6 +2306,8 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
(void)TRI; // Silence unused warning in non-assert builds.
Register FramePtr = RegInfo->getFrameRegister(MF);
+ ARMSubtarget::PushPopSplitVariation PushPopSplit =
+ STI.getPushPopSplitVariation(MF);
// Spill R4 if Thumb2 function requires stack realignment - it will be used as
// scratch register. Also spill R4 if Thumb2 function has varsized objects,
@@ -2365,7 +2386,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
if (Spilled) {
NumGPRSpills++;
- if (!STI.splitFramePushPop(MF)) {
+ if (PushPopSplit != ARMSubtarget::SplitR7) {
if (Reg == ARM::LR)
LRSpilled = true;
CS1Spilled = true;
@@ -2387,7 +2408,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
break;
}
} else {
- if (!STI.splitFramePushPop(MF)) {
+ if (PushPopSplit != ARMSubtarget::SplitR7) {
UnspilledCS1GPRs.push_back(Reg);
continue;
}
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp
index f9d822873bb004..c4a782bc40910a 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -485,11 +485,34 @@ bool ARMSubtarget::ignoreCSRForAllocationOrder(const MachineFunction &MF,
ARM::GPRRegClass.contains(PhysReg);
}
-bool ARMSubtarget::splitFramePointerPush(const MachineFunction &MF) const {
+ARMSubtarget::PushPopSplitVariation
+ARMSubtarget::getPushPopSplitVariation(const MachineFunction &MF) const {
const Function &F = MF.getFunction();
- if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI() ||
- !F.needsUnwindTableEntry())
- return false;
const MachineFrameInfo &MFI = MF.getFrameInfo();
- return MFI.hasVarSizedObjects() || getRegisterInfo()->hasStackRealignment(MF);
+ const std::vector<CalleeSavedInfo> CSI =
+ MF.getFrameInfo().getCalleeSavedInfo();
+
+ // Returns SplitR7 if the frame setup must be split into two separate pushes
+ // of r0-r7,lr and another containing r8-r11 (+r12 if necessary). This is
+ // always required on Thumb1-only targets, as the push and pop instructions
+ // can't access the high registers. This is also required when R7 is the frame
+ // pointer and frame pointer elimiination is disabled, or branch signing is
+ // enabled and AAPCS is disabled.
+ if ((MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress() &&
+ !createAAPCSFrameChain()) ||
+ (getFramePointerReg() == ARM::R7 &&
+ MF.getTarget().Options.DisableFramePointerElim(MF)) ||
+ isThumb1Only())
+ return SplitR7;
+
+ // Returns SplitR11WindowsSEH when the stack pointer needs to be
+ // restored from the frame pointer r11 + an offset and Windows CFI is enabled.
+ // This stack unwinding cannot be expressed with SEH unwind opcodes when done
+ // with a single push, making it necessary to split the push into r4-r10, and
+ // another containing r11+lr.
+ if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
+ F.needsUnwindTableEntry() &&
+ (MFI.hasVarSizedObjects() || getRegisterInfo()->hasStackRealignment(MF)))
+ return SplitR11WindowsSEH;
+ return NoSplit;
}
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index 1ca5bd4a620c4c..bea3e1441eaaed 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -81,6 +81,32 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
SingleIssuePlusExtras,
};
+ /// How the push and pop instructions of callee saved general-purpose
+ /// registers should be split.
+ enum PushPopSplitVariation {
+ /// All GPRs can be pushed in a single instruction.
+ /// push {r0-r12, lr}
+ /// vpush {d8-d15}
+ NoSplit,
+
+ /// R7 and LR must be adjacent, because R7 is the frame pointer, and must
+ /// point to a frame record consisting of the previous frame pointer and the
+ /// retun address.
+ /// push {r0-r7, lr}
+ /// push {r8-r12}
+ /// vpush {d8-d15}
+ SplitR7,
+
+ /// When the stack frame size if now known (because of variable-sized
+ /// objects or realignment), Windows SEH requires the callee-saved registers
+ /// to be stored in three regions, with R11 and LR below the floating-point
+ /// registers.
+ /// push {r0-r10, r12}
+ /// vpush {d8-d15}
+ /// push {r11, lr}
+ SplitR11WindowsSEH,
+ };
+
protected:
// Bool members corresponding to the SubtargetFeatures defined in tablegen
#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
@@ -371,19 +397,8 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
return ARM::R11;
}
- /// Returns true if the frame setup is split into two separate pushes (first
- /// r0-r7,lr then r8-r11), principally so that the frame pointer is adjacent
- /// to lr. This is always required on Thumb1-only targets, as the push and
- /// pop instructions can't access the high registers.
- bool splitFramePushPop(const MachineFunction &MF) const {
- if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress())
- return true;
- return (getFramePointerReg() == ARM::R7 &&
- MF.getTarget().Options.DisableFramePointerElim(MF)) ||
- isThumb1Only();
- }
-
- bool splitFramePointerPush(const MachineFunction &MF) const;
+ enum PushPopSplitVariation
+ getPushPopSplitVariation(const MachineFunction &MF) const;
bool useStride4VFPs() const;
diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index cb9ded7dee57b9..ad39539232a8ca 100644
--- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -160,6 +160,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
assert(NumBytes >= ArgRegsSaveSize &&
"ArgRegsSaveSize is included in NumBytes");
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+ ARMSubtarget::PushPopSplitVariation PushPopSplit =
+ STI.getPushPopSplitVariation(MF);
// Debug location must be unknown since the first debug location is used
// to determine the end of the prologue.
@@ -221,7 +223,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
case ARM::R8:
case ARM::R9:
case ARM::R10:
- if (STI.splitFramePushPop(MF)) {
+ if (PushPopSplit == ARMSubtarget::SplitR7) {
GPRCS2Size += 4;
break;
}
@@ -365,7 +367,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
case ARM::R10:
case ARM::R11:
case ARM::R12:
- if (STI.splitFramePushPop(MF))
+ if (PushPopSplit == ARMSubtarget::SplitR7)
break;
[[fallthrough]];
case ARM::R0:
>From a48e68623503138ac88d3d0fb45e0000d663335a Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Wed, 25 Sep 2024 16:17:17 +0100
Subject: [PATCH 2/9] [ARM] Factor out code to determine spill areas (NFC)
There were multiple loops in ARMFrameLowering which sort the callee
saved registers into spill areas, which were hard to understand and
modify. This splits the information about which register is in which
save area into a separate function.
---
llvm/lib/Target/ARM/ARMFrameLowering.cpp | 294 ++++++++++++-----------
1 file changed, 150 insertions(+), 144 deletions(-)
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 2df9756b9cd753..6307b4f4b3bb34 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -173,6 +173,95 @@ static MachineBasicBlock::iterator
skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
unsigned NumAlignedDPRCS2Regs);
+enum class SpillArea {
+ GPRCS1,
+ GPRCS2,
+ DPRCS1,
+ DPRCS2,
+ FPCXT,
+};
+
+/// Get the spill area that Reg should be saved into in the prologue.
+SpillArea getSpillArea(Register Reg,
+ ARMSubtarget::PushPopSplitVariation Variation,
+ unsigned NumAlignedDPRCS2Regs,
+ const ARMBaseRegisterInfo *RegInfo) {
+ // NoSplit:
+ // push {r0-r12, lr} GPRCS1
+ // vpush {r8-d15} DPRCS1
+ //
+ // SplitR7:
+ // push {r0-r7, lr} GPRCS1
+ // push {r8-r12} GPRCS2
+ // vpush {r8-d15} DPRCS1
+ //
+ // SplitR11WindowsSEH:
+ // push {r0-r10, r12} GPRCS1
+ // vpush {r8-d15} DPRCS1
+ // push {r11, lr} GPRCS2
+
+ // If FPCXTNS is spilled (for CMSE secure entryfunctions), it is always at
+ // the top of the stack frame.
+ // The DPRCS2 region is used for ABIs which only guarantee 4-byte alignment
+ // of SP. If used, it will be below the other save areas, after the stack has
+ // been re-aligned.
+
+ switch (Reg) {
+ default:
+ dbgs() << "Don't know where to spill " << printReg(Reg, RegInfo) << "\n";
+ llvm_unreachable("Don't know where to spill this register");
+ break;
+
+ case ARM::FPCXTNS:
+ return SpillArea::FPCXT;
+
+ case ARM::R0: case ARM::R1: case ARM::R2: case ARM::R3:
+ case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7:
+ return SpillArea::GPRCS1;
+
+ case ARM::R8: case ARM::R9: case ARM::R10:
+ if (Variation == ARMSubtarget::SplitR7)
+ return SpillArea::GPRCS2;
+ else
+ return SpillArea::GPRCS1;
+
+ case ARM::R11:
+ if (Variation == ARMSubtarget::NoSplit)
+ return SpillArea::GPRCS1;
+ else
+ return SpillArea::GPRCS2;
+
+ case ARM::R12:
+ if (Variation == ARMSubtarget::SplitR7)
+ return SpillArea::GPRCS2;
+ else
+ return SpillArea::GPRCS1;
+
+ case ARM::LR:
+ if (Variation == ARMSubtarget::SplitR11WindowsSEH)
+ return SpillArea::GPRCS2;
+ else
+ return SpillArea::GPRCS1;
+
+ case ARM::D0: case ARM::D1: case ARM::D2: case ARM::D3:
+ case ARM::D4: case ARM::D5: case ARM::D6: case ARM::D7:
+ return SpillArea::DPRCS1;
+
+ case ARM::D8: case ARM::D9: case ARM::D10: case ARM::D11:
+ case ARM::D12: case ARM::D13: case ARM::D14: case ARM::D15:
+ if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
+ return SpillArea::DPRCS2;
+ else
+ return SpillArea::DPRCS1;
+
+ case ARM::D16: case ARM::D17: case ARM::D18: case ARM::D19:
+ case ARM::D20: case ARM::D21: case ARM::D22: case ARM::D23:
+ case ARM::D24: case ARM::D25: case ARM::D26: case ARM::D27:
+ case ARM::D28: case ARM::D29: case ARM::D30: case ARM::D31:
+ return SpillArea::DPRCS1;
+ }
+}
+
ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti)
: TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, Align(4)),
STI(sti) {}
@@ -791,81 +880,32 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
return;
}
- // Determine spill area sizes.
- if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
- for (const CalleeSavedInfo &I : CSI) {
- Register Reg = I.getReg();
- int FI = I.getFrameIdx();
- switch (Reg) {
- case ARM::R11:
- case ARM::LR:
- if (Reg == FramePtr)
- FramePtrSpillFI = FI;
- GPRCS2Size += 4;
- break;
- case ARM::R0:
- case ARM::R1:
- case ARM::R2:
- case ARM::R3:
- case ARM::R4:
- case ARM::R5:
- case ARM::R6:
- case ARM::R7:
- case ARM::R8:
- case ARM::R9:
- case ARM::R10:
- case ARM::R12:
- GPRCS1Size += 4;
- break;
- case ARM::FPCXTNS:
- FPCXTSaveSize = 4;
- break;
- default:
- // This is a DPR. Exclude the aligned DPRCS2 spills.
- if (Reg == ARM::D8)
- D8SpillFI = FI;
- if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
- DPRCSSize += 8;
- }
- }
- } else {
- for (const CalleeSavedInfo &I : CSI) {
- Register Reg = I.getReg();
- int FI = I.getFrameIdx();
- switch (Reg) {
- case ARM::R8:
- case ARM::R9:
- case ARM::R10:
- case ARM::R11:
- case ARM::R12:
- if (PushPopSplit == ARMSubtarget::SplitR7) {
- GPRCS2Size += 4;
- break;
- }
- [[fallthrough]];
- case ARM::R0:
- case ARM::R1:
- case ARM::R2:
- case ARM::R3:
- case ARM::R4:
- case ARM::R5:
- case ARM::R6:
- case ARM::R7:
- case ARM::LR:
- if (Reg == FramePtr)
- FramePtrSpillFI = FI;
- GPRCS1Size += 4;
- break;
- case ARM::FPCXTNS:
- FPCXTSaveSize = 4;
- break;
- default:
- // This is a DPR. Exclude the aligned DPRCS2 spills.
- if (Reg == ARM::D8)
- D8SpillFI = FI;
- if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
- DPRCSSize += 8;
- }
+ // Determine spill area sizes, and some important frame indices.
+ for (const CalleeSavedInfo &I : CSI) {
+ Register Reg = I.getReg();
+ int FI = I.getFrameIdx();
+
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
+ if (Reg == ARM::D8)
+ D8SpillFI = FI;
+
+ switch (getSpillArea(Reg, PushPopSplit, AFI->getNumAlignedDPRCS2Regs(),
+ RegInfo)) {
+ case SpillArea::FPCXT:
+ FPCXTSaveSize += 4;
+ break;
+ case SpillArea::GPRCS1:
+ GPRCS1Size += 4;
+ break;
+ case SpillArea::GPRCS2:
+ GPRCS2Size += 4;
+ break;
+ case SpillArea::DPRCS1:
+ DPRCSSize += 8;
+ break;
+ case SpillArea::DPRCS2:
+ break;
}
}
@@ -895,7 +935,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);
}
- // Determine starting offsets of spill areas.
+ // Determine starting offsets of spill areas. These offsets are all positive
+ // offsets from the bottom of the lowest-addressed callee-save area
+ // (excluding DPRCS2, which is th the re-aligned stack region) to the bottom
+ // of the spill area in question.
unsigned FPCXTOffset = NumBytes - ArgRegsSaveSize - FPCXTSaveSize;
unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
@@ -915,10 +958,19 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
}
int FramePtrOffsetInPush = 0;
if (HasFP) {
+ // Offset from the CFA to the saved frame pointer, will be negative.
int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
+ LLVM_DEBUG(dbgs() << "FramePtrSpillFI: " << FramePtrSpillFI
+ << ", FPOffset: " << FPOffset << "\n");
assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset &&
"Max FP estimation is wrong");
+ // Offset from the top of the GPRCS1 area to the saved frame pointer, will
+ // be negative.
FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize;
+ LLVM_DEBUG(dbgs() << "FramePtrOffsetInPush=" << FramePtrOffsetInPush
+ << ", FramePtrSpillOffset="
+ << (MFI.getObjectOffset(FramePtrSpillFI) + NumBytes)
+ << "\n");
AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
NumBytes);
}
@@ -1123,80 +1175,34 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// Now that the prologue's actual instructions are finalised, we can insert
// the necessary DWARF cf instructions to describe the situation. Start by
// recording where each register ended up:
- if (GPRCS1Size > 0 && !NeedsWinCFI) {
- MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
- int CFIIndex;
- for (const auto &Entry : CSI) {
+ if (!NeedsWinCFI) {
+ for (const auto &Entry : reverse(CSI)) {
Register Reg = Entry.getReg();
int FI = Entry.getFrameIdx();
- switch (Reg) {
- case ARM::R8:
- case ARM::R9:
- case ARM::R10:
- case ARM::R11:
- case ARM::R12:
- if (PushPopSplit == ARMSubtarget::SplitR7)
- break;
- [[fallthrough]];
- case ARM::R0:
- case ARM::R1:
- case ARM::R2:
- case ARM::R3:
- case ARM::R4:
- case ARM::R5:
- case ARM::R6:
- case ARM::R7:
- case ARM::LR:
- CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
- nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
- BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlags(MachineInstr::FrameSetup);
+ MachineBasicBlock::iterator CFIPos;
+ switch (getSpillArea(Reg, PushPopSplit, AFI->getNumAlignedDPRCS2Regs(),
+ RegInfo)) {
+ case SpillArea::GPRCS1:
+ CFIPos = std::next(GPRCS1Push);
break;
- }
- }
- }
-
- if (GPRCS2Size > 0 && !NeedsWinCFI) {
- MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
- for (const auto &Entry : CSI) {
- Register Reg = Entry.getReg();
- int FI = Entry.getFrameIdx();
- switch (Reg) {
- case ARM::R8:
- case ARM::R9:
- case ARM::R10:
- case ARM::R11:
- case ARM::R12:
- if (PushPopSplit == ARMSubtarget::SplitR7) {
- unsigned DwarfReg = MRI->getDwarfRegNum(
- Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg, true);
- int64_t Offset = MFI.getObjectOffset(FI);
- unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
- BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlags(MachineInstr::FrameSetup);
- }
+ case SpillArea::GPRCS2:
+ CFIPos = std::next(GPRCS2Push);
+ break;
+ case SpillArea::DPRCS1:
+ CFIPos = std::next(LastPush);
+ break;
+ case SpillArea::FPCXT:
+ case SpillArea::DPRCS2:
+ // FPCXT and DPRCS2 are not represented in the DWARF info.
break;
}
- }
- }
- if (DPRCSSize > 0 && !NeedsWinCFI) {
- // Since vpush register list cannot have gaps, there may be multiple vpush
- // instructions in the prologue.
- MachineBasicBlock::iterator Pos = std::next(LastPush);
- for (const auto &Entry : CSI) {
- Register Reg = Entry.getReg();
- int FI = Entry.getFrameIdx();
- if ((Reg >= ARM::D0 && Reg <= ARM::D31) &&
- (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) {
- unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
- int64_t Offset = MFI.getObjectOffset(FI);
- unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
- BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ if (CFIPos.isValid()) {
+ int CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+ nullptr,
+ MRI->getDwarfRegNum(Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg, true),
+ MFI.getObjectOffset(FI)));
+ BuildMI(MBB, CFIPos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
}
>From 8eac8ccb448789319ed11d8b2f05c0f7b6d41424 Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Fri, 27 Sep 2024 15:25:39 +0100
Subject: [PATCH 3/9] [ARM] Refactor generation of push/pop instructions (NFC)
These used a set of callback functions to check which callee-save area a
register is in, refactor them to use the same data as other parts of
ARMFrameLowering. This will make it easier to add a new variant to the
register splitting.
---
llvm/lib/Target/ARM/ARMBaseRegisterInfo.h | 74 -----------------
llvm/lib/Target/ARM/ARMFrameLowering.cpp | 99 +++++++++++++----------
llvm/lib/Target/ARM/ARMFrameLowering.h | 7 +-
3 files changed, 60 insertions(+), 120 deletions(-)
diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
index 926d702b4092a5..5d465f51ed1d9e 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -41,80 +41,6 @@ namespace ARMRI {
} // end namespace ARMRI
-/// isARMArea1Register - Returns true if the register is a low register (r0-r7)
-/// or a stack/pc register that we should push/pop.
-static inline bool isARMArea1Register(unsigned Reg, bool SplitFramePushPop) {
- using namespace ARM;
-
- switch (Reg) {
- case R0: case R1: case R2: case R3:
- case R4: case R5: case R6: case R7:
- case LR: case SP: case PC:
- return true;
- case R8: case R9: case R10: case R11: case R12:
- // For iOS we want r7 and lr to be next to each other.
- return !SplitFramePushPop;
- default:
- return false;
- }
-}
-
-static inline bool isARMArea2Register(unsigned Reg, bool SplitFramePushPop) {
- using namespace ARM;
-
- switch (Reg) {
- case R8: case R9: case R10: case R11: case R12:
- // iOS has this second area.
- return SplitFramePushPop;
- default:
- return false;
- }
-}
-
-static inline bool isSplitFPArea1Register(unsigned Reg,
- bool SplitFramePushPop) {
- using namespace ARM;
-
- switch (Reg) {
- case R0: case R1: case R2: case R3:
- case R4: case R5: case R6: case R7:
- case R8: case R9: case R10: case R12:
- case SP: case PC:
- return true;
- default:
- return false;
- }
-}
-
-static inline bool isSplitFPArea2Register(unsigned Reg,
- bool SplitFramePushPop) {
- using namespace ARM;
-
- switch (Reg) {
- case R11: case LR:
- return true;
- default:
- return false;
- }
-}
-
-static inline bool isARMArea3Register(unsigned Reg, bool SplitFramePushPop) {
- using namespace ARM;
-
- switch (Reg) {
- case D15: case D14: case D13: case D12:
- case D11: case D10: case D9: case D8:
- case D7: case D6: case D5: case D4:
- case D3: case D2: case D1: case D0:
- case D31: case D30: case D29: case D28:
- case D27: case D26: case D25: case D24:
- case D23: case D22: case D21: case D20:
- case D19: case D18: case D17: case D16:
- return true;
- default:
- return false;
- }
-}
static inline bool isCalleeSavedRegister(unsigned Reg,
const MCPhysReg *CSRegs) {
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 6307b4f4b3bb34..d80749601eb231 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -1538,14 +1538,11 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
ArrayRef<CalleeSavedInfo> CSI,
unsigned StmOpc, unsigned StrOpc,
- bool NoGap, bool (*Func)(unsigned, bool),
- unsigned NumAlignedDPRCS2Regs,
- unsigned MIFlags) const {
+ bool NoGap,
+ std::function<bool(unsigned)> Func) const {
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
- ARMSubtarget::PushPopSplitVariation PushPopSplit =
- STI.getPushPopSplitVariation(MF);
DebugLoc DL;
@@ -1557,11 +1554,7 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
unsigned LastReg = 0;
for (; i != 0; --i) {
Register Reg = CSI[i-1].getReg();
- if (!(Func)(Reg, PushPopSplit == ARMSubtarget::SplitR7))
- continue;
-
- // D-registers in the aligned area DPRCS2 are NOT spilled here.
- if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
+ if (!Func(Reg))
continue;
const MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -1592,7 +1585,7 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
if (Regs.size() > 1 || StrOpc== 0) {
MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
.addReg(ARM::SP)
- .setMIFlags(MIFlags)
+ .setMIFlags(MachineInstr::FrameSetup)
.add(predOps(ARMCC::AL));
for (unsigned i = 0, e = Regs.size(); i < e; ++i)
MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
@@ -1600,7 +1593,7 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP)
.addReg(Regs[0].first, getKillRegState(Regs[0].second))
.addReg(ARM::SP)
- .setMIFlags(MIFlags)
+ .setMIFlags(MachineInstr::FrameSetup)
.addImm(-4)
.add(predOps(ARMCC::AL));
}
@@ -1619,8 +1612,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
MutableArrayRef<CalleeSavedInfo> CSI,
unsigned LdmOpc, unsigned LdrOpc,
bool isVarArg, bool NoGap,
- bool (*Func)(unsigned, bool),
- unsigned NumAlignedDPRCS2Regs) const {
+ std::function<bool(unsigned)> Func) const {
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
@@ -1655,12 +1647,9 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
for (; i != 0; --i) {
CalleeSavedInfo &Info = CSI[i-1];
Register Reg = Info.getReg();
- if (!(Func)(Reg, PushPopSplit == ARMSubtarget::SplitR7))
+ if (!Func(Reg))
continue;
- // The aligned reloads from area DPRCS2 are not inserted here.
- if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
- continue;
if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
!isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&
STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
@@ -2006,6 +1995,7 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
ARMSubtarget::PushPopSplitVariation PushPopSplit =
STI.getPushPopSplitVariation(MF);
+ const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
unsigned PushOneOpc = AFI->isThumbFunction() ?
@@ -2027,20 +2017,33 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(
.addImm(-4)
.add(predOps(ARMCC::AL));
}
+
+ auto CheckRegArea = [PushPopSplit, NumAlignedDPRCS2Regs,
+ RegInfo](unsigned Reg, SpillArea TestArea) {
+ return getSpillArea(Reg, PushPopSplit, NumAlignedDPRCS2Regs, RegInfo) ==
+ TestArea;
+ };
+ auto IsGPRCS1 = [&CheckRegArea](unsigned Reg) {
+ return CheckRegArea(Reg, SpillArea::GPRCS1);
+ };
+ auto IsGPRCS2 = [&CheckRegArea](unsigned Reg) {
+ return CheckRegArea(Reg, SpillArea::GPRCS2);
+ };
+ auto IsDPRCS1 = [&CheckRegArea](unsigned Reg) {
+ return CheckRegArea(Reg, SpillArea::DPRCS1);
+ };
+
+ // Windows SEH requires the floating-point registers to be pushed between the
+ // two blocks of GPRs in some situations. In all other cases, they are pushed
+ // below the GPRs.
if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
- emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false,
- &isSplitFPArea1Register, 0, MachineInstr::FrameSetup);
- emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
- NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
- emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false,
- &isSplitFPArea2Register, 0, MachineInstr::FrameSetup);
+ emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, IsGPRCS1);
+ emitPushInst(MBB, MI, CSI, FltOpc, 0, true, IsDPRCS1);
+ emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, IsGPRCS2);
} else {
- emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register,
- 0, MachineInstr::FrameSetup);
- emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register,
- 0, MachineInstr::FrameSetup);
- emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
- NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
+ emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, IsGPRCS1);
+ emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, IsGPRCS2);
+ emitPushInst(MBB, MI, CSI, FltOpc, 0, true, IsDPRCS1);
}
// The code above does not insert spill code for the aligned DPRCS2 registers.
@@ -2060,6 +2063,8 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(
MachineFunction &MF = *MBB.getParent();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
+
bool isVarArg = AFI->getArgRegsSaveSize() > 0;
unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
ARMSubtarget::PushPopSplitVariation PushPopSplit =
@@ -2074,20 +2079,30 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(
unsigned LdrOpc =
AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
unsigned FltOpc = ARM::VLDMDIA_UPD;
+
+ auto CheckRegArea = [PushPopSplit, NumAlignedDPRCS2Regs,
+ RegInfo](unsigned Reg, SpillArea TestArea) {
+ return getSpillArea(Reg, PushPopSplit, NumAlignedDPRCS2Regs, RegInfo) ==
+ TestArea;
+ };
+ auto IsGPRCS1 = [&CheckRegArea](unsigned Reg) {
+ return CheckRegArea(Reg, SpillArea::GPRCS1);
+ };
+ auto IsGPRCS2 = [&CheckRegArea](unsigned Reg) {
+ return CheckRegArea(Reg, SpillArea::GPRCS2);
+ };
+ auto IsDPRCS1 = [&CheckRegArea](unsigned Reg) {
+ return CheckRegArea(Reg, SpillArea::DPRCS1);
+ };
+
if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
- emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
- &isSplitFPArea2Register, 0);
- emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
- NumAlignedDPRCS2Regs);
- emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
- &isSplitFPArea1Register, 0);
+ emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, IsGPRCS2);
+ emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, IsDPRCS1);
+ emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, IsGPRCS1);
} else {
- emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
- NumAlignedDPRCS2Regs);
- emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
- &isARMArea2Register, 0);
- emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
- &isARMArea1Register, 0);
+ emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, IsDPRCS1);
+ emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, IsGPRCS2);
+ emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, IsGPRCS1);
}
return true;
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.h b/llvm/lib/Target/ARM/ARMFrameLowering.h
index 6a31b73957f134..0866d2a9c68274 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.h
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.h
@@ -90,13 +90,12 @@ class ARMFrameLowering : public TargetFrameLowering {
private:
void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
ArrayRef<CalleeSavedInfo> CSI, unsigned StmOpc,
- unsigned StrOpc, bool NoGap, bool (*Func)(unsigned, bool),
- unsigned NumAlignedDPRCS2Regs, unsigned MIFlags = 0) const;
+ unsigned StrOpc, bool NoGap,
+ std::function<bool(unsigned)> Func) const;
void emitPopInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
MutableArrayRef<CalleeSavedInfo> CSI, unsigned LdmOpc,
unsigned LdrOpc, bool isVarArg, bool NoGap,
- bool (*Func)(unsigned, bool),
- unsigned NumAlignedDPRCS2Regs) const;
+ std::function<bool(unsigned)> Func) const;
MachineBasicBlock::iterator
eliminateCallFramePseudoInstr(MachineFunction &MF,
>From cdfecdb72dfa340eab1463d35115d07b15532675 Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Fri, 27 Sep 2024 11:24:30 +0100
Subject: [PATCH 4/9] [ARM] Add debug dump for StackAdjustingInsts (NFC)
---
llvm/lib/Target/ARM/ARMFrameLowering.cpp | 24 +++++++++++++++++++++++-
1 file changed, 23 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index d80749601eb231..8334ea94b85d5f 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -689,6 +689,14 @@ struct StackAdjustingInsts {
MachineBasicBlock::iterator I;
unsigned SPAdjust;
bool BeforeFPSet;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ void dump() {
+ dbgs() << " " << (BeforeFPSet ? "before-fp " : " ")
+ << "sp-adjust=" << SPAdjust;
+ I->dump();
+ }
+#endif
};
SmallVector<InstInfo, 4> Insts;
@@ -723,6 +731,14 @@ struct StackAdjustingInsts {
.setMIFlags(MachineInstr::FrameSetup);
}
}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ void dump() {
+ dbgs() << "StackAdjustingInsts:\n";
+ for (auto &Info : Insts)
+ Info.dump();
+ }
+#endif
};
} // end anonymous namespace
@@ -842,6 +858,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
ARMSubtarget::PushPopSplitVariation PushPopSplit =
STI.getPushPopSplitVariation(MF);
+ LLVM_DEBUG(dbgs() << "Emitting prologue for " << MF.getName() << "\n");
+
// Debug location must be unknown since the first debug location is used
// to determine the end of the prologue.
DebugLoc dl;
@@ -1213,8 +1231,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// throughout the process. If we have a frame pointer, it takes over the job
// half-way through, so only the first few .cfi_def_cfa_offset instructions
// actually get emitted.
- if (!NeedsWinCFI)
+ if (!NeedsWinCFI) {
+ LLVM_DEBUG(DefCFAOffsetCandidates.dump());
DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
+ }
if (STI.isTargetELF() && hasFP(MF))
MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() -
@@ -1294,6 +1314,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
ARMSubtarget::PushPopSplitVariation PushPopSplit =
STI.getPushPopSplitVariation(MF);
+ LLVM_DEBUG(dbgs() << "Emitting epilogue for " << MF.getName() << "\n");
+
// Amount of stack space we reserved next to incoming args for either
// varargs registers or stack arguments in tail calls made by this function.
unsigned ReservedArgStack = AFI->getArgRegsSaveSize();
>From 3ed954d9c60209ebf95ca8d7da387398df6220c0 Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Fri, 27 Sep 2024 11:34:22 +0100
Subject: [PATCH 5/9] [ARM] Remove always-true checks from Thumb1 frame
lowering (NFC)
For Thumb1, we always split the callee-saved register pushes at R7, so
we don't need to check for this.
---
llvm/lib/Target/ARM/Thumb1FrameLowering.cpp | 15 +++++----------
1 file changed, 5 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index ad39539232a8ca..e7d0a14da6dc9d 100644
--- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -160,8 +160,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
assert(NumBytes >= ArgRegsSaveSize &&
"ArgRegsSaveSize is included in NumBytes");
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
- ARMSubtarget::PushPopSplitVariation PushPopSplit =
- STI.getPushPopSplitVariation(MF);
+ assert(STI.getPushPopSplitVariation(MF) == ARMSubtarget::SplitR7 &&
+ "Must use R7 spilt for Thumb1");
// Debug location must be unknown since the first debug location is used
// to determine the end of the prologue.
@@ -223,11 +223,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
case ARM::R8:
case ARM::R9:
case ARM::R10:
- if (PushPopSplit == ARMSubtarget::SplitR7) {
- GPRCS2Size += 4;
- break;
- }
- [[fallthrough]];
+ GPRCS2Size += 4;
+ break;
case ARM::LR:
if (HasFrameRecordArea) {
FRSize += 4;
@@ -367,9 +364,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
case ARM::R10:
case ARM::R11:
case ARM::R12:
- if (PushPopSplit == ARMSubtarget::SplitR7)
- break;
- [[fallthrough]];
+ break;
case ARM::R0:
case ARM::R1:
case ARM::R2:
>From c4812b585e614910eff2715b4a3e14345ef6772e Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Fri, 27 Sep 2024 12:13:47 +0100
Subject: [PATCH 6/9] [ARM] Pre-commit test showing frame pointer bug
---
.../CodeGen/Thumb2/pacbti-m-frame-chain.ll | 142 ++++++++++++++++++
1 file changed, 142 insertions(+)
create mode 100644 llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll
new file mode 100644
index 00000000000000..b08d4e4735ea7f
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll
@@ -0,0 +1,142 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=thumbv8.1m.main-none-eabi < %s --force-dwarf-frame-section -frame-pointer=all -mattr=+aapcs-frame-chain | FileCheck %s
+
+; int test1() {
+; return 0;
+; }
+; Here, r11 is used as the frame pointer before it is pushed to the stack, so
+; it's value isn't preserved.
+define i32 @test1() "sign-return-address"="non-leaf" {
+; CHECK-LABEL: test1:
+; CHECK: .cfi_sections .debug_frame
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: @ %bb.0: @ %entry
+; CHECK-NEXT: pac r12, lr, sp
+; CHECK-NEXT: .save {r11, ra_auth_code, lr}
+; CHECK-NEXT: push.w {r11, r12, lr}
+; CHECK-NEXT: .cfi_def_cfa_offset 12
+; CHECK-NEXT: .cfi_offset lr, -4
+; CHECK-NEXT: .cfi_offset ra_auth_code, -8
+; CHECK-NEXT: .cfi_offset r11, -12
+; CHECK-NEXT: .setfp r11, sp
+; CHECK-NEXT: mov r11, sp
+; CHECK-NEXT: .cfi_def_cfa_register r11
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: pop.w {r11, r12, lr}
+; CHECK-NEXT: aut r12, lr, sp
+; CHECK-NEXT: bx lr
+entry:
+ ret i32 0
+}
+
+; void foo(int n) {
+; int a[n];
+; bar(a);
+; }
+define dso_local void @test2(i32 noundef %n) "sign-return-address"="non-leaf" {
+; CHECK-LABEL: test2:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: @ %bb.0: @ %entry
+; CHECK-NEXT: pac r12, lr, sp
+; CHECK-NEXT: .save {r4, r7, r11, ra_auth_code, lr}
+; CHECK-NEXT: push.w {r4, r7, r11, r12, lr}
+; CHECK-NEXT: .cfi_def_cfa_offset 20
+; CHECK-NEXT: .cfi_offset lr, -4
+; CHECK-NEXT: .cfi_offset ra_auth_code, -8
+; CHECK-NEXT: .cfi_offset r11, -12
+; CHECK-NEXT: .cfi_offset r7, -16
+; CHECK-NEXT: .cfi_offset r4, -20
+; CHECK-NEXT: .setfp r11, sp, #8
+; CHECK-NEXT: add.w r11, sp, #8
+; CHECK-NEXT: .cfi_def_cfa r11, 12
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: movs r1, #7
+; CHECK-NEXT: add.w r0, r1, r0, lsl #2
+; CHECK-NEXT: bic r0, r0, #7
+; CHECK-NEXT: sub.w r0, sp, r0
+; CHECK-NEXT: mov sp, r0
+; CHECK-NEXT: bl take_ptr
+; CHECK-NEXT: sub.w r4, r11, #8
+; CHECK-NEXT: mov sp, r4
+; CHECK-NEXT: pop.w {r4, r7, r11, r12, lr}
+; CHECK-NEXT: aut r12, lr, sp
+; CHECK-NEXT: bx lr
+entry:
+ %vla = alloca i32, i32 %n, align 4
+ call void @take_ptr(ptr noundef nonnull %vla)
+ ret void
+}
+
+; void test3(int c, float e, int z) {
+; if (c)
+; knr();
+; take_ptr(alloca(z));
+; if (e)
+; knr();
+; }
+define void @test3(i32 noundef %c, float noundef %e, i32 noundef %z) "sign-return-address"="non-leaf" {
+; CHECK-LABEL: test3:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: @ %bb.0: @ %entry
+; CHECK-NEXT: pac r12, lr, sp
+; CHECK-NEXT: .save {r4, r5, r6, r7, r11, ra_auth_code, lr}
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r11, r12, lr}
+; CHECK-NEXT: .cfi_def_cfa_offset 28
+; CHECK-NEXT: .cfi_offset lr, -4
+; CHECK-NEXT: .cfi_offset ra_auth_code, -8
+; CHECK-NEXT: .cfi_offset r11, -12
+; CHECK-NEXT: .cfi_offset r7, -16
+; CHECK-NEXT: .cfi_offset r6, -20
+; CHECK-NEXT: .cfi_offset r5, -24
+; CHECK-NEXT: .cfi_offset r4, -28
+; CHECK-NEXT: .setfp r11, sp, #16
+; CHECK-NEXT: add.w r11, sp, #16
+; CHECK-NEXT: .cfi_def_cfa r11, 12
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r5, r2
+; CHECK-NEXT: mov r4, r1
+; CHECK-NEXT: it ne
+; CHECK-NEXT: blne knr
+; CHECK-NEXT: adds r0, r5, #7
+; CHECK-NEXT: bic r0, r0, #7
+; CHECK-NEXT: sub.w r0, sp, r0
+; CHECK-NEXT: mov sp, r0
+; CHECK-NEXT: bl take_ptr
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: movs r1, #0
+; CHECK-NEXT: bl __aeabi_fcmpeq
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it eq
+; CHECK-NEXT: bleq knr
+; CHECK-NEXT: sub.w r4, r11, #16
+; CHECK-NEXT: mov sp, r4
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r11, r12, lr}
+; CHECK-NEXT: aut r12, lr, sp
+; CHECK-NEXT: bx lr
+entry:
+ %tobool.not = icmp eq i32 %c, 0
+ br i1 %tobool.not, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ tail call void @knr()
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ %0 = alloca i8, i32 %z, align 8
+ call void @take_ptr(ptr noundef nonnull %0)
+ %tobool1 = fcmp une float %e, 0.000000e+00
+ br i1 %tobool1, label %if.then2, label %if.end3
+
+if.then2: ; preds = %if.end
+ call void @knr()
+ br label %if.end3
+
+if.end3: ; preds = %if.then2, %if.end
+ ret void
+}
+
+declare void @knr(...)
+declare void @take_ptr(ptr noundef)
>From 529235f6722965c4bef2a47f01d370c38d10a7c5 Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Wed, 25 Sep 2024 18:48:08 +0100
Subject: [PATCH 7/9] [ARM] Fix bugs with AAPCS frame chains and PACBTI
When using AAPCS-compliant frame chains with PACBTI return address
signing, there ware a number of bugs in the generation of the frame
pointer and function prologues. The most obvious was that we sometimes
would modify r11 before pushing it to the stack, so it wasn't preserved
as required by the PCS. We also sometimes did not push R11 and LR
adjacent to one another on the stack, or used R11 as a frame pointer
without pointing it at the saved value of R11, both of which are
required to have an AAPCS compliant frame chain.
The original work of this patch was done by James Westwood, reviewed as
#82801 and #81249, with some tidy-ups done by Mark Murray and myself.
---
llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp | 5 +-
llvm/lib/Target/ARM/ARMCallingConv.td | 19 ++-
llvm/lib/Target/ARM/ARMFrameLowering.cpp | 144 ++++++++++++------
llvm/lib/Target/ARM/ARMSubtarget.cpp | 7 +
llvm/lib/Target/ARM/ARMSubtarget.h | 12 ++
.../CodeGen/Thumb2/pacbti-m-frame-chain.ll | 78 +++++-----
6 files changed, 173 insertions(+), 92 deletions(-)
diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 9bb1e9754217a1..d2e91b02e55b50 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -116,9 +116,12 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_iOS_SaveList;
if (PushPopSplit == ARMSubtarget::SplitR7)
- return STI.createAAPCSFrameChain() ? CSR_AAPCS_SplitPush_SaveList
+ return STI.createAAPCSFrameChain() ? CSR_AAPCS_SplitPush_R7_SaveList
: CSR_ATPCS_SplitPush_SaveList;
+ if (PushPopSplit == ARMSubtarget::SplitR11AAPCSSignRA)
+ return CSR_AAPCS_SplitPush_R11_SaveList;
+
return CSR_AAPCS_SaveList;
}
diff --git a/llvm/lib/Target/ARM/ARMCallingConv.td b/llvm/lib/Target/ARM/ARMCallingConv.td
index d14424c2decac3..27f175a7003366 100644
--- a/llvm/lib/Target/ARM/ARMCallingConv.td
+++ b/llvm/lib/Target/ARM/ARMCallingConv.td
@@ -301,14 +301,17 @@ def CSR_ATPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush,
def CSR_ATPCS_SplitPush_SwiftTail : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush,
R10)>;
-// When enforcing an AAPCS compliant frame chain, R11 is used as the frame
-// pointer even for Thumb targets, where split pushes are necessary.
-// This AAPCS alternative makes sure the frame index slots match the push
-// order in that case.
-def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R11,
- R7, R6, R5, R4,
- R10, R9, R8,
- (sequence "D%u", 15, 8))>;
+// Sometimes we need to split the push of the callee-saved GPRs into two
+// regions, to ensure that the frame chain record is set up correctly. These
+// list the callee-saved registers in the order they end up on the stack, which
+// depends on whether the frame pointer is r7 or r11.
+def CSR_AAPCS_SplitPush_R11 : CalleeSavedRegs<(add R10, R9, R8, R7, R6, R5, R4,
+ LR, R11,
+ (sequence "D%u", 15, 8))>;
+def CSR_AAPCS_SplitPush_R7 : CalleeSavedRegs<(add LR, R11,
+ R7, R6, R5, R4,
+ R10, R9, R8,
+ (sequence "D%u", 15, 8))>;
// Constructors and destructors return 'this' in the ARM C++ ABI; since 'this'
// and the pointer return value are both passed in R0 in these cases, this can
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 8334ea94b85d5f..3273943db2139d 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -199,6 +199,11 @@ SpillArea getSpillArea(Register Reg,
// push {r0-r10, r12} GPRCS1
// vpush {r8-d15} DPRCS1
// push {r11, lr} GPRCS2
+ //
+ // SplitR11AAPCSSignRA:
+ // push {r0-r10, r12} GPRSC1
+ // push {r11, lr} GPRCS2
+ // vpush {r8-d15} DPRCS1
// If FPCXTNS is spilled (for CMSE secure entryfunctions), it is always at
// the top of the stack frame.
@@ -238,7 +243,8 @@ SpillArea getSpillArea(Register Reg,
return SpillArea::GPRCS1;
case ARM::LR:
- if (Variation == ARMSubtarget::SplitR11WindowsSEH)
+ if (Variation == ARMSubtarget::SplitR11WindowsSEH ||
+ Variation == ARMSubtarget::SplitR11AAPCSSignRA)
return SpillArea::GPRCS2;
else
return SpillArea::GPRCS1;
@@ -827,6 +833,9 @@ static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
// This is a conservative estimation: Assume the frame pointer being r7 and
// pc("r15") up to r8 getting spilled before (= 8 registers).
int MaxRegBytes = 8 * 4;
+ if (PushPopSplit == ARMSubtarget::SplitR11AAPCSSignRA)
+ // Here, r11 can be stored below all of r4-r15.
+ MaxRegBytes = 11 * 4;
if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
// Here, r11 can be stored below all of r4-r15 plus d8-d15.
MaxRegBytes = 11 * 4 + 8 * 8;
@@ -899,17 +908,23 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
}
// Determine spill area sizes, and some important frame indices.
+ SpillArea FramePtrSpillArea;
+ bool BeforeFPPush = true;
for (const CalleeSavedInfo &I : CSI) {
Register Reg = I.getReg();
int FI = I.getFrameIdx();
- if (Reg == FramePtr)
+ SpillArea Area = getSpillArea(Reg, PushPopSplit,
+ AFI->getNumAlignedDPRCS2Regs(), RegInfo);
+
+ if (Reg == FramePtr) {
FramePtrSpillFI = FI;
+ FramePtrSpillArea = Area;
+ }
if (Reg == ARM::D8)
D8SpillFI = FI;
- switch (getSpillArea(Reg, PushPopSplit, AFI->getNumAlignedDPRCS2Regs(),
- RegInfo)) {
+ switch (Area) {
case SpillArea::FPCXT:
FPCXTSaveSize += 4;
break;
@@ -936,7 +951,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// Move past FPCXT area.
if (FPCXTSaveSize > 0) {
LastPush = MBBI++;
- DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, true);
+ DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, BeforeFPPush);
}
// Allocate the vararg register save area.
@@ -944,13 +959,15 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
MachineInstr::FrameSetup);
LastPush = std::prev(MBBI);
- DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, true);
+ DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, BeforeFPPush);
}
// Move past area 1.
if (GPRCS1Size > 0) {
GPRCS1Push = LastPush = MBBI++;
- DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);
+ DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, BeforeFPPush);
+ if (FramePtrSpillArea == SpillArea::GPRCS1)
+ BeforeFPPush = false;
}
// Determine starting offsets of spill areas. These offsets are all positive
@@ -974,7 +991,6 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
} else {
DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
}
- int FramePtrOffsetInPush = 0;
if (HasFP) {
// Offset from the CFA to the saved frame pointer, will be negative.
int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
@@ -982,13 +998,6 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
<< ", FPOffset: " << FPOffset << "\n");
assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset &&
"Max FP estimation is wrong");
- // Offset from the top of the GPRCS1 area to the saved frame pointer, will
- // be negative.
- FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize;
- LLVM_DEBUG(dbgs() << "FramePtrOffsetInPush=" << FramePtrOffsetInPush
- << ", FramePtrSpillOffset="
- << (MFI.getObjectOffset(FramePtrSpillFI) + NumBytes)
- << "\n");
AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
NumBytes);
}
@@ -1000,7 +1009,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// after DPRCS1.
if (GPRCS2Size > 0 && PushPopSplit != ARMSubtarget::SplitR11WindowsSEH) {
GPRCS2Push = LastPush = MBBI++;
- DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
+ DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size, BeforeFPPush);
+ if (FramePtrSpillArea == SpillArea::GPRCS2)
+ BeforeFPPush = false;
}
// Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
@@ -1013,7 +1024,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
else {
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
MachineInstr::FrameSetup);
- DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize);
+ DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize, BeforeFPPush);
}
}
@@ -1022,7 +1033,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// Since vpush register list cannot have gaps, there may be multiple vpush
// instructions in the prologue.
while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
- DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI));
+ DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI), BeforeFPPush);
LastPush = MBBI++;
}
}
@@ -1041,7 +1052,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// Move GPRCS2, if using using SplitR11WindowsSEH.
if (GPRCS2Size > 0 && PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
GPRCS2Push = LastPush = MBBI++;
- DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
+ DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size, BeforeFPPush);
+ if (FramePtrSpillArea == SpillArea::GPRCS2)
+ BeforeFPPush = false;
}
bool NeedsWinCFIStackAlloc = NeedsWinCFI;
@@ -1142,28 +1155,51 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// into spill area 1, including the FP in R11. In either case, it
// is in area one and the adjustment needs to take place just after
// that push.
- // FIXME: The above is not necessary true when PACBTI is enabled.
- // AAPCS requires use of R11, and PACBTI gets in the way of regular pushes,
- // so FP ends up on area two.
MachineBasicBlock::iterator AfterPush;
if (HasFP) {
- AfterPush = std::next(GPRCS1Push);
- unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push);
- int FPOffset = PushSize + FramePtrOffsetInPush;
- if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
- AfterPush = std::next(GPRCS2Push);
- emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
- FramePtr, ARM::SP, 0, MachineInstr::FrameSetup);
- } else {
- emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
- FramePtr, ARM::SP, FPOffset,
- MachineInstr::FrameSetup);
+ MachineBasicBlock::iterator FPPushInst;
+ // Offset from SP immediately after the push which saved the FP to the FP
+ // save slot.
+ int64_t FPOffsetAfterPush;
+ switch (FramePtrSpillArea) {
+ case SpillArea::GPRCS1:
+ FPPushInst = GPRCS1Push;
+ FPOffsetAfterPush = MFI.getObjectOffset(FramePtrSpillFI) +
+ ArgRegsSaveSize + FPCXTSaveSize +
+ sizeOfSPAdjustment(*FPPushInst);
+ LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS1, offset "
+ << FPOffsetAfterPush << " after that push\n");
+ break;
+ case SpillArea::GPRCS2:
+ FPPushInst = GPRCS2Push;
+ FPOffsetAfterPush = MFI.getObjectOffset(FramePtrSpillFI) +
+ ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
+ sizeOfSPAdjustment(*FPPushInst);
+ if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
+ FPOffsetAfterPush += DPRCSSize + DPRGapSize;
+ LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS2, offset "
+ << FPOffsetAfterPush << " after that push\n");
+ break;
+ default:
+ llvm_unreachable("frame pointer in unknown spill area");
+ break;
}
+ AfterPush = std::next(FPPushInst);
+ if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
+ assert(FPOffsetAfterPush == 0);
+
+ // Emit the MOV or ADD to set up the frame pointer register.
+ emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
+ FramePtr, ARM::SP, FPOffsetAfterPush,
+ MachineInstr::FrameSetup);
+
if (!NeedsWinCFI) {
- if (FramePtrOffsetInPush + PushSize != 0) {
+ // Emit DWARF info to find the CFA using the frame pointer from this
+ // point onward.
+ if (FPOffsetAfterPush != 0) {
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
nullptr, MRI->getDwarfRegNum(FramePtr, true),
- FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush));
+ -MFI.getObjectOffset(FramePtrSpillFI)));
BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
@@ -1675,7 +1711,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
!isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&
STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
- PushPopSplit != ARMSubtarget::SplitR11WindowsSEH) {
+ (PushPopSplit != ARMSubtarget::SplitR11WindowsSEH &&
+ PushPopSplit != ARMSubtarget::SplitR11AAPCSSignRA)) {
Reg = ARM::PC;
// Fold the return instruction into the LDM.
DeleteRet = true;
@@ -2907,18 +2944,29 @@ bool ARMFrameLowering::assignCalleeSavedSpillSlots(
const auto &AFI = *MF.getInfo<ARMFunctionInfo>();
if (AFI.shouldSignReturnAddress()) {
// The order of register must match the order we push them, because the
- // PEI assigns frame indices in that order. When compiling for return
- // address sign and authenication, we use split push, therefore the orders
- // we want are:
- // LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
- CSI.insert(find_if(CSI,
- [=](const auto &CS) {
- Register Reg = CS.getReg();
- return Reg == ARM::R10 || Reg == ARM::R11 ||
- Reg == ARM::R8 || Reg == ARM::R9 ||
- ARM::DPRRegClass.contains(Reg);
- }),
- CalleeSavedInfo(ARM::R12));
+ // PEI assigns frame indices in that order. That order depends on the
+ // PushPopSplitVariation, there are only two cases which we use with return
+ // address signing:
+ switch (STI.getPushPopSplitVariation(MF)) {
+ case ARMSubtarget::SplitR7:
+ // LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
+ CSI.insert(find_if(CSI,
+ [=](const auto &CS) {
+ Register Reg = CS.getReg();
+ return Reg == ARM::R10 || Reg == ARM::R11 ||
+ Reg == ARM::R8 || Reg == ARM::R9 ||
+ ARM::DPRRegClass.contains(Reg);
+ }),
+ CalleeSavedInfo(ARM::R12));
+ break;
+ case ARMSubtarget::SplitR11AAPCSSignRA:
+ // With SplitR11AAPCSSignRA, R12 will always be the highest-addressed CSR
+ // on the stack.
+ CSI.insert(CSI.begin(), CalleeSavedInfo(ARM::R12));
+ break;
+ default:
+ llvm_unreachable("Unexpected CSR split with return address signing");
+ }
}
return false;
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp
index c4a782bc40910a..9adfb1fab5f084 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -514,5 +514,12 @@ ARMSubtarget::getPushPopSplitVariation(const MachineFunction &MF) const {
F.needsUnwindTableEntry() &&
(MFI.hasVarSizedObjects() || getRegisterInfo()->hasStackRealignment(MF)))
return SplitR11WindowsSEH;
+
+ // Returns R11SplitAAPCSBranchSigning if R11 and lr are not adjacent to each
+ // other in the list of callee saved registers in a frame, and branch
+ // signing is enabled.
+ if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress() &&
+ getFramePointerReg() == ARM::R11)
+ return SplitR11AAPCSSignRA;
return NoSplit;
}
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index bea3e1441eaaed..197911a2c0a7ac 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -105,6 +105,18 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
/// vpush {d8-d15}
/// push {r11, lr}
SplitR11WindowsSEH,
+
+ /// When generating AAPCS-compilant frame chains, R11 is the frame pointer,
+ /// and must be pushed adjacent to the return address (LR). Normally this
+ /// isn't a problem, because the only register between them is r12, which is
+ /// the intra-procedure-call scratch register, so doesn't need to be saved.
+ /// However, when PACBTI is in use, r12 contains the authentication code, so
+ /// does need to be saved. This means that we need a separate push for R11
+ /// and LR.
+ /// push {r0-r10, r12}
+ /// push {r11, lr}
+ /// vpush {d8-d15}
+ SplitR11AAPCSSignRA,
};
protected:
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll
index b08d4e4735ea7f..8bcf87130c5400 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll
@@ -4,25 +4,27 @@
; int test1() {
; return 0;
; }
-; Here, r11 is used as the frame pointer before it is pushed to the stack, so
-; it's value isn't preserved.
define i32 @test1() "sign-return-address"="non-leaf" {
; CHECK-LABEL: test1:
; CHECK: .cfi_sections .debug_frame
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: @ %bb.0: @ %entry
; CHECK-NEXT: pac r12, lr, sp
-; CHECK-NEXT: .save {r11, ra_auth_code, lr}
-; CHECK-NEXT: push.w {r11, r12, lr}
+; CHECK-NEXT: .save {ra_auth_code}
+; CHECK-NEXT: str r12, [sp, #-4]!
+; CHECK-NEXT: .cfi_def_cfa_offset 4
+; CHECK-NEXT: .cfi_offset ra_auth_code, -4
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push.w {r11, lr}
; CHECK-NEXT: .cfi_def_cfa_offset 12
-; CHECK-NEXT: .cfi_offset lr, -4
-; CHECK-NEXT: .cfi_offset ra_auth_code, -8
+; CHECK-NEXT: .cfi_offset lr, -8
; CHECK-NEXT: .cfi_offset r11, -12
; CHECK-NEXT: .setfp r11, sp
; CHECK-NEXT: mov r11, sp
; CHECK-NEXT: .cfi_def_cfa_register r11
; CHECK-NEXT: movs r0, #0
-; CHECK-NEXT: pop.w {r11, r12, lr}
+; CHECK-NEXT: pop.w {r11, lr}
+; CHECK-NEXT: ldr r12, [sp], #4
; CHECK-NEXT: aut r12, lr, sp
; CHECK-NEXT: bx lr
entry:
@@ -38,17 +40,20 @@ define dso_local void @test2(i32 noundef %n) "sign-return-address"="non-leaf" {
; CHECK: .cfi_startproc
; CHECK-NEXT: @ %bb.0: @ %entry
; CHECK-NEXT: pac r12, lr, sp
-; CHECK-NEXT: .save {r4, r7, r11, ra_auth_code, lr}
-; CHECK-NEXT: push.w {r4, r7, r11, r12, lr}
+; CHECK-NEXT: .save {r4, r7, ra_auth_code}
+; CHECK-NEXT: push.w {r4, r7, r12}
+; CHECK-NEXT: .cfi_def_cfa_offset 12
+; CHECK-NEXT: .cfi_offset ra_auth_code, -4
+; CHECK-NEXT: .cfi_offset r7, -8
+; CHECK-NEXT: .cfi_offset r4, -12
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push.w {r11, lr}
; CHECK-NEXT: .cfi_def_cfa_offset 20
-; CHECK-NEXT: .cfi_offset lr, -4
-; CHECK-NEXT: .cfi_offset ra_auth_code, -8
-; CHECK-NEXT: .cfi_offset r11, -12
-; CHECK-NEXT: .cfi_offset r7, -16
-; CHECK-NEXT: .cfi_offset r4, -20
-; CHECK-NEXT: .setfp r11, sp, #8
-; CHECK-NEXT: add.w r11, sp, #8
-; CHECK-NEXT: .cfi_def_cfa r11, 12
+; CHECK-NEXT: .cfi_offset lr, -16
+; CHECK-NEXT: .cfi_offset r11, -20
+; CHECK-NEXT: .setfp r11, sp
+; CHECK-NEXT: mov r11, sp
+; CHECK-NEXT: .cfi_def_cfa_register r11
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: movs r1, #7
@@ -57,9 +62,9 @@ define dso_local void @test2(i32 noundef %n) "sign-return-address"="non-leaf" {
; CHECK-NEXT: sub.w r0, sp, r0
; CHECK-NEXT: mov sp, r0
; CHECK-NEXT: bl take_ptr
-; CHECK-NEXT: sub.w r4, r11, #8
-; CHECK-NEXT: mov sp, r4
-; CHECK-NEXT: pop.w {r4, r7, r11, r12, lr}
+; CHECK-NEXT: mov sp, r11
+; CHECK-NEXT: pop.w {r11, lr}
+; CHECK-NEXT: pop.w {r4, r7, r12}
; CHECK-NEXT: aut r12, lr, sp
; CHECK-NEXT: bx lr
entry:
@@ -80,19 +85,22 @@ define void @test3(i32 noundef %c, float noundef %e, i32 noundef %z) "sign-retur
; CHECK: .cfi_startproc
; CHECK-NEXT: @ %bb.0: @ %entry
; CHECK-NEXT: pac r12, lr, sp
-; CHECK-NEXT: .save {r4, r5, r6, r7, r11, ra_auth_code, lr}
-; CHECK-NEXT: push.w {r4, r5, r6, r7, r11, r12, lr}
+; CHECK-NEXT: .save {r4, r5, r6, r7, ra_auth_code}
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r12}
+; CHECK-NEXT: .cfi_def_cfa_offset 20
+; CHECK-NEXT: .cfi_offset ra_auth_code, -4
+; CHECK-NEXT: .cfi_offset r7, -8
+; CHECK-NEXT: .cfi_offset r6, -12
+; CHECK-NEXT: .cfi_offset r5, -16
+; CHECK-NEXT: .cfi_offset r4, -20
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push.w {r11, lr}
; CHECK-NEXT: .cfi_def_cfa_offset 28
-; CHECK-NEXT: .cfi_offset lr, -4
-; CHECK-NEXT: .cfi_offset ra_auth_code, -8
-; CHECK-NEXT: .cfi_offset r11, -12
-; CHECK-NEXT: .cfi_offset r7, -16
-; CHECK-NEXT: .cfi_offset r6, -20
-; CHECK-NEXT: .cfi_offset r5, -24
-; CHECK-NEXT: .cfi_offset r4, -28
-; CHECK-NEXT: .setfp r11, sp, #16
-; CHECK-NEXT: add.w r11, sp, #16
-; CHECK-NEXT: .cfi_def_cfa r11, 12
+; CHECK-NEXT: .cfi_offset lr, -24
+; CHECK-NEXT: .cfi_offset r11, -28
+; CHECK-NEXT: .setfp r11, sp
+; CHECK-NEXT: mov r11, sp
+; CHECK-NEXT: .cfi_def_cfa_register r11
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: cmp r0, #0
@@ -111,9 +119,9 @@ define void @test3(i32 noundef %c, float noundef %e, i32 noundef %z) "sign-retur
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it eq
; CHECK-NEXT: bleq knr
-; CHECK-NEXT: sub.w r4, r11, #16
-; CHECK-NEXT: mov sp, r4
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r11, r12, lr}
+; CHECK-NEXT: mov sp, r11
+; CHECK-NEXT: pop.w {r11, lr}
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r12}
; CHECK-NEXT: aut r12, lr, sp
; CHECK-NEXT: bx lr
entry:
>From 9596ae7161f01108d1ef16a653c0087832d5d57a Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Fri, 27 Sep 2024 13:31:38 +0100
Subject: [PATCH 8/9] [ARM] Re-generate PACBTI tests using update_llc_checks.py
---
llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll | 45 +++--
llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll | 148 +++++++++------
.../Thumb2/pacbti-m-indirect-tail-call.ll | 39 ++--
.../CodeGen/Thumb2/pacbti-m-outliner-3.ll | 124 +++++++------
.../CodeGen/Thumb2/pacbti-m-outliner-4.ll | 172 +++++++++++-------
.../test/CodeGen/Thumb2/pacbti-m-overalign.ll | 60 +++---
.../test/CodeGen/Thumb2/pacbti-m-varargs-1.ll | 77 +++++---
.../test/CodeGen/Thumb2/pacbti-m-varargs-2.ll | 79 +++++---
llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll | 113 +++++++++---
9 files changed, 546 insertions(+), 311 deletions(-)
diff --git a/llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll b/llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll
index c309d992b95a5e..c2a2ed2d0c8e8a 100644
--- a/llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll
+++ b/llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll
@@ -1,9 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s
; RUN: llc --filetype=obj %s -o - | llvm-readelf -u - | FileCheck %s --check-prefix=UNWIND
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main-arm-unknown-eabi"
+; Check the function starts with `pacbti` and correct unwind info is emitted
define hidden i32 @_Z1fi(i32 %x) "sign-return-address"="non-leaf" "sign-return-address-key"="a_key" "branch-target-enforcement" {
+; CHECK-LABEL: _Z1fi:
+; CHECK: .cfi_sections .debug_frame
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: @ %bb.0: @ %entry
+; CHECK-NEXT: pacbti r12, lr, sp
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: .cfi_offset lr, -4
+; CHECK-NEXT: .cfi_offset r7, -8
+; CHECK-NEXT: .save {ra_auth_code}
+; CHECK-NEXT: str r12, [sp, #-4]!
+; CHECK-NEXT: .cfi_def_cfa_offset 12
+; CHECK-NEXT: .cfi_offset ra_auth_code, -12
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: adds r0, #1
+; CHECK-NEXT: bl _Z1gi
+; CHECK-NEXT: subs r0, #1
+; CHECK-NEXT: add sp, #4
+; CHECK-NEXT: ldr r12, [sp], #4
+; CHECK-NEXT: pop.w {r7, lr}
+; CHECK-NEXT: aut r12, lr, sp
+; CHECK-NEXT: bx lr
entry:
%add = add nsw i32 %x, 1
%call = tail call i32 @_Z1gi(i32 %add)
@@ -13,24 +40,6 @@ entry:
declare dso_local i32 @_Z1gi(i32)
-; Check the function starts with `pacbti` and correct unwind info is emitted
-; CHECK-LABEL: _Z1fi:
-; ...
-; CHECK: pacbti r12, lr, sp
-; CHECK-NEXT: .save {r7, lr}
-; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: .cfi_offset lr, -4
-; CHECK-NEXT: .cfi_offset r7, -8
-; CHECK-NEXT: .save {ra_auth_code}
-; CHECK-NEXT: str r12, [sp, #-4]!
-; CHECK-NEXT: .cfi_def_cfa_offset 12
-; CHECK-NEXT: .cfi_offset ra_auth_code, -12
-; CHECK-NEXT: .pad #4
-; CHECK-NEXT: sub sp, #4
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; ...
-
; UNWIND-LABEL: Opcodes [
; UNWIND-NEXT: 0x00 ; vsp = vsp + 4
; UNWIND-NEXT: 0xB4 ; pop ra_auth_code
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll
index 0ae46cb8879ee0..64c5a6c7030145 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s
; RUN: llc --filetype=obj %s -o - | llvm-readelf -s --unwind - | FileCheck %s --check-prefix=UNWIND
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
@@ -16,6 +17,31 @@ target triple = "thumbv8m.main-none-none-eabi"
; }
define hidden i32 @f0(i32 %x) local_unnamed_addr "sign-return-address"="non-leaf" {
+; CHECK-LABEL: f0:
+; CHECK: .cfi_sections .debug_frame
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: @ %bb.0: @ %entry
+; CHECK-NEXT: pac r12, lr, sp
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: .cfi_offset lr, -4
+; CHECK-NEXT: .cfi_offset r7, -8
+; CHECK-NEXT: .save {ra_auth_code}
+; CHECK-NEXT: str r12, [sp, #-4]!
+; CHECK-NEXT: .cfi_def_cfa_offset 12
+; CHECK-NEXT: .cfi_offset ra_auth_code, -12
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: subs r0, #1
+; CHECK-NEXT: bl g
+; CHECK-NEXT: adds r0, #1
+; CHECK-NEXT: add sp, #4
+; CHECK-NEXT: ldr r12, [sp], #4
+; CHECK-NEXT: pop.w {r7, lr}
+; CHECK-NEXT: aut r12, lr, sp
+; CHECK-NEXT: bx lr
entry:
%sub = add nsw i32 %x, -1
%call = tail call i32 @g(i32 %sub)
@@ -23,27 +49,32 @@ entry:
ret i32 %add
}
-; CHECK-LABEL: f0:
-; CHECK: pac r12, lr, sp
-; CHECK-NEXT: .save {r7, lr}
-; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: .cfi_offset lr, -4
-; CHECK-NEXT: .cfi_offset r7, -8
-; CHECK-NEXT: .save {ra_auth_code}
-; CHECK-NEXT: str r12, [sp, #-4]!
-; CHECK-NEXT: .cfi_def_cfa_offset 12
-; CHECK-NEXT: .cfi_offset ra_auth_code, -12
-; CHECK-NEXT: .pad #4
-; CHECK-NEXT: sub sp, #4
-; ...
-; CHECK: add sp, #4
-; CHECK-NEXT: ldr r12, [sp], #4
-; CHECK-NEXT: pop.w {r7, lr}
-; CHECK-NEXT: aut r12, lr, sp
-; CHECK-NEXT: bx lr
-
define hidden i32 @f1(i32 %x) local_unnamed_addr #0 {
+; CHECK-LABEL: f1:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: @ %bb.0: @ %entry
+; CHECK-NEXT: pac r12, lr, sp
+; CHECK-NEXT: vstr fpcxtns, [sp, #-4]!
+; CHECK-NEXT: .cfi_def_cfa_offset 4
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .cfi_def_cfa_offset 12
+; CHECK-NEXT: .cfi_offset lr, -8
+; CHECK-NEXT: .cfi_offset r7, -12
+; CHECK-NEXT: .save {ra_auth_code}
+; CHECK-NEXT: str r12, [sp, #-4]!
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset ra_auth_code, -16
+; CHECK-NEXT: subs r0, #1
+; CHECK-NEXT: bl g
+; CHECK-NEXT: adds r0, #1
+; CHECK-NEXT: ldr r12, [sp], #4
+; CHECK-NEXT: pop.w {r7, lr}
+; CHECK-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr}
+; CHECK-NEXT: vldr fpcxtns, [sp], #4
+; CHECK-NEXT: aut r12, lr, sp
+; CHECK-NEXT: clrm {r1, r2, r3, r12, apsr}
+; CHECK-NEXT: bxns lr
entry:
%sub = add nsw i32 %x, -1
%call = tail call i32 @g(i32 %sub)
@@ -51,44 +82,59 @@ entry:
ret i32 %add
}
-; CHECK-LABEL: f1:
-; CHECK: pac r12, lr, sp
-; CHECK-NEXT: vstr fpcxtns, [sp, #-4]!
-; CHECK-NEXT: .cfi_def_cfa_offset 4
-; CHECK-NEXT: .save {r7, lr}
-; CHECK-NEXT: push {r7, lr}
-; CHECK: vldr fpcxtns, [sp], #4
-; CHECK: aut r12, lr, sp
-
define hidden i32 @f2(i32 %x) local_unnamed_addr #1 {
+; CHECK-LABEL: f2:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: @ %bb.0: @ %entry
+; CHECK-NEXT: pac r12, lr, sp
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: .cfi_offset lr, -4
+; CHECK-NEXT: .cfi_offset r7, -8
+; CHECK-NEXT: .save {ra_auth_code}
+; CHECK-NEXT: str r12, [sp, #-4]!
+; CHECK-NEXT: .cfi_def_cfa_offset 12
+; CHECK-NEXT: .cfi_offset ra_auth_code, -12
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: subs r0, #1
+; CHECK-NEXT: bl g
+; CHECK-NEXT: adds r0, #1
+; CHECK-NEXT: add sp, #4
+; CHECK-NEXT: ldr r12, [sp], #4
+; CHECK-NEXT: pop.w {r7, lr}
+; CHECK-NEXT: aut r12, lr, sp
+; CHECK-NEXT: mrs r12, control
+; CHECK-NEXT: tst.w r12, #8
+; CHECK-NEXT: beq .LBB2_2
+; CHECK-NEXT: @ %bb.1: @ %entry
+; CHECK-NEXT: vmrs r12, fpscr
+; CHECK-NEXT: vmov d0, lr, lr
+; CHECK-NEXT: vmov d1, lr, lr
+; CHECK-NEXT: vmov d2, lr, lr
+; CHECK-NEXT: vmov d3, lr, lr
+; CHECK-NEXT: vmov d4, lr, lr
+; CHECK-NEXT: vmov d5, lr, lr
+; CHECK-NEXT: vmov d6, lr, lr
+; CHECK-NEXT: vmov d7, lr, lr
+; CHECK-NEXT: bic r12, r12, #159
+; CHECK-NEXT: bic r12, r12, #4026531840
+; CHECK-NEXT: vmsr fpscr, r12
+; CHECK-NEXT: .LBB2_2: @ %entry
+; CHECK-NEXT: mov r1, lr
+; CHECK-NEXT: mov r2, lr
+; CHECK-NEXT: mov r3, lr
+; CHECK-NEXT: mov r12, lr
+; CHECK-NEXT: msr apsr_nzcvq, lr
+; CHECK-NEXT: bxns lr
entry:
%sub = add nsw i32 %x, -1
%call = tail call i32 @g(i32 %sub)
%add = add nsw i32 %call, 1
ret i32 %add
}
-; CHECK-LABEL: f2:
-; CHECK: pac r12, lr, sp
-; CHECK-NEXT: .save {r7, lr}
-; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: .cfi_offset lr, -4
-; CHECK-NEXT: .cfi_offset r7, -8
-; CHECK-NEXT: .save {ra_auth_code}
-; CHECK-NEXT: str r12, [sp, #-4]!
-; CHECK-NEXT: .cfi_def_cfa_offset 12
-; CHECK-NEXT: .cfi_offset ra_auth_code, -12
-; CHECK-NEXT: .pad #4
-; CHECK-NEXT: sub sp, #4
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; ...
-; CHECK: add sp, #4
-; CHECK-NEXT: ldr r12, [sp], #4
-; CHECK-NEXT: pop.w {r7, lr}
-; CHECK-NEXT: aut r12, lr, sp
-; CHECK-NEXT: mrs r12, control
-; ...
-; CHECK: bxns lr
declare dso_local i32 @g(i32) local_unnamed_addr
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-indirect-tail-call.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-indirect-tail-call.ll
index 4dfac252e2314c..9a8bba47f33ad6 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-indirect-tail-call.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-indirect-tail-call.ll
@@ -1,11 +1,33 @@
-; RUN: llc %s -o - | FileCheck %s --check-prefix=CHECK1
-; RUN: llc %s -o - | FileCheck %s --check-prefix=CHECK2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc %s -o - | FileCheck %s
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main-arm-unknown-eabi"
@p = hidden local_unnamed_addr global ptr null, align 4
define hidden i32 @f(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
+; CHECK-LABEL: f:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: pac r12, lr, sp
+; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, lr}
+; CHECK-NEXT: .save {ra_auth_code}
+; CHECK-NEXT: str r12, [sp, #-4]!
+; CHECK-NEXT: mov r7, r3
+; CHECK-NEXT: mov r5, r2
+; CHECK-NEXT: mov r6, r1
+; CHECK-NEXT: bl g
+; CHECK-NEXT: movw r1, :lower16:p
+; CHECK-NEXT: mov r2, r5
+; CHECK-NEXT: movt r1, :upper16:p
+; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: ldr r4, [r1]
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: blx r4
+; CHECK-NEXT: ldr r12, [sp], #4
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, lr}
+; CHECK-NEXT: aut r12, lr, sp
+; CHECK-NEXT: bx lr
entry:
%call = tail call i32 @g(i32 %a) #0
%0 = load ptr, ptr @p, align 4
@@ -13,19 +35,6 @@ entry:
ret i32 %call1
}
-; CHECK1-LABEL: f
-; ...
-; CHECK1: aut r12, lr, sp
-; CHECK1-NOT: bx r12
-
-; CHECK2-LABEL: f
-; ...
-; CHECK2: blx r4
-; CHECK2-NEXT: ldr r12, [sp], #4
-; CHECK2-NEXT: pop.w {r4, r5, r6, r7, lr}
-; CHECK2-NEXT: aut r12, lr, sp
-; CHECK2-NEXT: bx lr
-
declare dso_local i32 @g(i32) local_unnamed_addr #0
attributes #0 = { nounwind "sign-return-address"="non-leaf"}
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll
index 1b13e06546f152..ad94b7be8b2a60 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s
; RUN: llc --filetype=obj %s -o - | llvm-readelf -s --unwind - | FileCheck %s --check-prefix=UNWIND
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
@@ -21,12 +22,50 @@ target triple = "thumbv7m-arm-none-eabi"
; }
define hidden i32 @h(i32 %a, i32 %b) local_unnamed_addr #0 {
+; CHECK-LABEL: h:
+; CHECK: .cfi_sections .debug_frame
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: @ %bb.0: @ %entry
+; CHECK-NEXT: add r0, r1
+; CHECK-NEXT: bx lr
entry:
%add = add nsw i32 %b, %a
ret i32 %add
}
define hidden i32 @f(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
+; CHECK-LABEL: f:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: @ %bb.0: @ %entry
+; CHECK-NEXT: pac r12, lr, sp
+; CHECK-NEXT: .save {r4, r5, r6, lr}
+; CHECK-NEXT: push {r4, r5, r6, lr}
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset lr, -4
+; CHECK-NEXT: .cfi_offset r6, -8
+; CHECK-NEXT: .cfi_offset r5, -12
+; CHECK-NEXT: .cfi_offset r4, -16
+; CHECK-NEXT: .save {ra_auth_code}
+; CHECK-NEXT: str r12, [sp, #-4]!
+; CHECK-NEXT: .cfi_def_cfa_offset 20
+; CHECK-NEXT: .cfi_offset ra_auth_code, -20
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: bmi .LBB1_2
+; CHECK-NEXT: @ %bb.1: @ %if.end
+; CHECK-NEXT: bl OUTLINED_FUNCTION_0
+; CHECK-NEXT: adds r0, #2
+; CHECK-NEXT: b .LBB1_3
+; CHECK-NEXT: .LBB1_2:
+; CHECK-NEXT: mov.w r0, #-1
+; CHECK-NEXT: .LBB1_3: @ %return
+; CHECK-NEXT: add sp, #4
+; CHECK-NEXT: ldr r12, [sp], #4
+; CHECK-NEXT: pop.w {r4, r5, r6, lr}
+; CHECK-NEXT: aut r12, lr, sp
+; CHECK-NEXT: bx lr
entry:
%cmp = icmp slt i32 %a, 0
br i1 %cmp, label %return, label %if.end
@@ -48,34 +87,39 @@ return: ; preds = %entry, %if.end
ret i32 %retval.0
}
-; CHECK-LABEL: f:
-; ...
-; CHECK: pac r12, lr, sp
-; CHECK-NEXT: .save {r4, r5, r6, lr}
-; CHECK-NEXT: push {r4, r5, r6, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset lr, -4
-; CHECK-NEXT: .cfi_offset r6, -8
-; CHECK-NEXT: .cfi_offset r5, -12
-; CHECK-NEXT: .cfi_offset r4, -16
-; CHECK-NEXT: .save {ra_auth_code}
-; CHECK-NEXT: str r12, [sp, #-4]!
-; CHECK-NEXT: .cfi_def_cfa_offset 20
-; CHECK-NEXT: .cfi_offset ra_auth_code, -20
-; CHECK-NEXT: .pad #4
-; CHECK-NEXT: sub sp, #4
-; CHECK-NEXT: .cfi_def_cfa_offset 24
-; ...
-; CHECK: bl OUTLINED_FUNCTION_0
-; ...
-; CHECK: add sp, #4
-; CHECK-NEXT: ldr r12, [sp], #4
-; CHECK-NEXT: pop.w {r4, r5, r6, lr}
-; CHECK-NEXT: aut r12, lr, sp
-; CHECK-NEXT: bx lr
-
-
define hidden i32 @g(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
+; CHECK-LABEL: g:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: @ %bb.0: @ %entry
+; CHECK-NEXT: pac r12, lr, sp
+; CHECK-NEXT: .save {r4, r5, r6, lr}
+; CHECK-NEXT: push {r4, r5, r6, lr}
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset lr, -4
+; CHECK-NEXT: .cfi_offset r6, -8
+; CHECK-NEXT: .cfi_offset r5, -12
+; CHECK-NEXT: .cfi_offset r4, -16
+; CHECK-NEXT: .save {ra_auth_code}
+; CHECK-NEXT: str r12, [sp, #-4]!
+; CHECK-NEXT: .cfi_def_cfa_offset 20
+; CHECK-NEXT: .cfi_offset ra_auth_code, -20
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: bmi .LBB2_2
+; CHECK-NEXT: @ %bb.1: @ %if.end
+; CHECK-NEXT: bl OUTLINED_FUNCTION_0
+; CHECK-NEXT: adds r0, #1
+; CHECK-NEXT: b .LBB2_3
+; CHECK-NEXT: .LBB2_2:
+; CHECK-NEXT: mov.w r0, #-1
+; CHECK-NEXT: .LBB2_3: @ %return
+; CHECK-NEXT: add sp, #4
+; CHECK-NEXT: ldr r12, [sp], #4
+; CHECK-NEXT: pop.w {r4, r5, r6, lr}
+; CHECK-NEXT: aut r12, lr, sp
+; CHECK-NEXT: bx lr
entry:
%cmp = icmp slt i32 %a, 0
br i1 %cmp, label %return, label %if.end
@@ -96,30 +140,6 @@ return: ; preds = %entry, %if.end
%retval.0 = phi i32 [ %add3, %if.end ], [ -1, %entry ]
ret i32 %retval.0
}
-; CHECK-LABEL: g:
-; CHECK: pac r12, lr, sp
-; CHECK-NEXT: .save {r4, r5, r6, lr}
-; CHECK-NEXT: push {r4, r5, r6, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset lr, -4
-; CHECK-NEXT: .cfi_offset r6, -8
-; CHECK-NEXT: .cfi_offset r5, -12
-; CHECK-NEXT: .cfi_offset r4, -16
-; CHECK-NEXT: .save {ra_auth_code}
-; CHECK-NEXT: str r12, [sp, #-4]!
-; CHECK-NEXT: .cfi_def_cfa_offset 20
-; CHECK-NEXT: .cfi_offset ra_auth_code, -20
-; CHECK-NEXT: .pad #4
-; CHECK-NEXT: sub sp, #4
-; CHECK-NEXT: .cfi_def_cfa_offset 24
-; ...
-; CHECK: bl OUTLINED_FUNCTION_0
-; ...
-; CHECK: add sp, #4
-; CHECK-NEXT: ldr r12, [sp], #4
-; CHECK-NEXT: pop.w {r4, r5, r6, lr}
-; CHECK-NEXT: aut r12, lr, sp
-; CHECK-NEXT: bx lr
; CHECK-LABEL: OUTLINED_FUNCTION_0:
; CHECK: pac r12, lr, sp
@@ -158,7 +178,7 @@ attributes #0 = { minsize noinline norecurse nounwind optsize readnone uwtable "
; UNWIND-LABEL: FunctionAddress: 0x5C
; UNWIND: 0xB4 ; pop ra_auth_code
; UNWIND: 0x84 0x00 ; pop {lr}
-
+
; UNWIND-LABEL: 0000005d {{.*}} OUTLINED_FUNCTION_0
; UNWIND-LABEL: 00000005 {{.*}} f
; UNWIND-LABEL: 00000031 {{.*}} g
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-4.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-4.ll
index 38c23977b623f9..c0b45c0f90eb25 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-4.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-4.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s
; RUN: llc --filetype=obj %s -o - | llvm-readelf -s --unwind - | FileCheck %s --check-prefix=UNWIND
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
@@ -27,6 +28,44 @@ target triple = "thumbv7m-arm-none-eabi"
@_ZTIi = external dso_local constant ptr
define hidden i32 @_Z1hii(i32 %a, i32 %b) local_unnamed_addr #0 {
+; CHECK-LABEL: _Z1hii:
+; CHECK: .cfi_sections .debug_frame
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: @ %bb.0: @ %entry
+; CHECK-NEXT: pac r12, lr, sp
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: .cfi_offset lr, -4
+; CHECK-NEXT: .cfi_offset r7, -8
+; CHECK-NEXT: .save {ra_auth_code}
+; CHECK-NEXT: str r12, [sp, #-4]!
+; CHECK-NEXT: .cfi_def_cfa_offset 12
+; CHECK-NEXT: .cfi_offset ra_auth_code, -12
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: cmp.w r0, #-1
+; CHECK-NEXT: ble .LBB0_2
+; CHECK-NEXT: @ %bb.1: @ %if.end
+; CHECK-NEXT: add r0, r1
+; CHECK-NEXT: add sp, #4
+; CHECK-NEXT: ldr r12, [sp], #4
+; CHECK-NEXT: pop.w {r7, lr}
+; CHECK-NEXT: aut r12, lr, sp
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .LBB0_2: @ %if.then
+; CHECK-NEXT: movs r0, #4
+; CHECK-NEXT: bl __cxa_allocate_exception
+; CHECK-NEXT: movs r1, #1
+; CHECK-NEXT: movs r2, #0
+; CHECK-NEXT: str r1, [r0]
+; CHECK-NEXT: ldr r1, .LCPI0_0
+; CHECK-NEXT: bl __cxa_throw
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.3:
+; CHECK-NEXT: .LCPI0_0:
+; CHECK-NEXT: .long _ZTIi
entry:
%cmp = icmp slt i32 %a, 0
br i1 %cmp, label %if.then, label %if.end
@@ -42,31 +81,47 @@ if.end: ; preds = %entry
ret i32 %add
}
-; CHECK-LABEL: _Z1hii:
-; ...
-; CHECK: pac r12, lr, sp
-; CHECK-NEXT: .save {r7, lr}
-; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: .cfi_offset lr, -4
-; CHECK-NEXT: .cfi_offset r7, -8
-; CHECK-NEXT: .save {ra_auth_code}
-; CHECK-NEXT: str r12, [sp, #-4]!
-; CHECK-NEXT: .cfi_def_cfa_offset 12
-; CHECK-NEXT: .cfi_offset ra_auth_code, -12
-; CHECK-NEXT: .pad #4
-; CHECK-NEXT: sub sp, #4
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; ...
-; CHECK-NOT: pac
-; CHECK: aut
-; CHECK: .cfi_endproc
-
declare dso_local ptr @__cxa_allocate_exception(i32) local_unnamed_addr
declare dso_local void @__cxa_throw(ptr, ptr, ptr) local_unnamed_addr
define hidden i32 @_Z1fiiii(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
+; CHECK-LABEL: _Z1fiiii:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: @ %bb.0: @ %entry
+; CHECK-NEXT: pac r12, lr, sp
+; CHECK-NEXT: .save {r4, r5, r6, lr}
+; CHECK-NEXT: push {r4, r5, r6, lr}
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset lr, -4
+; CHECK-NEXT: .cfi_offset r6, -8
+; CHECK-NEXT: .cfi_offset r5, -12
+; CHECK-NEXT: .cfi_offset r4, -16
+; CHECK-NEXT: .save {ra_auth_code}
+; CHECK-NEXT: str r12, [sp, #-4]!
+; CHECK-NEXT: .cfi_def_cfa_offset 20
+; CHECK-NEXT: .cfi_offset ra_auth_code, -20
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: bmi .LBB1_2
+; CHECK-NEXT: @ %bb.1: @ %if.end
+; CHECK-NEXT: bl OUTLINED_FUNCTION_0
+; CHECK-NEXT: adds r1, r0, r6
+; CHECK-NEXT: muls r0, r1, r0
+; CHECK-NEXT: adds r1, r4, r5
+; CHECK-NEXT: sdiv r0, r0, r1
+; CHECK-NEXT: adds r0, #2
+; CHECK-NEXT: b .LBB1_3
+; CHECK-NEXT: .LBB1_2:
+; CHECK-NEXT: mov.w r0, #-1
+; CHECK-NEXT: .LBB1_3: @ %return
+; CHECK-NEXT: add sp, #4
+; CHECK-NEXT: ldr r12, [sp], #4
+; CHECK-NEXT: pop.w {r4, r5, r6, lr}
+; CHECK-NEXT: aut r12, lr, sp
+; CHECK-NEXT: bx lr
entry:
%cmp = icmp slt i32 %a, 0
br i1 %cmp, label %return, label %if.end
@@ -85,35 +140,43 @@ return: ; preds = %entry, %if.end
ret i32 %retval.0
}
-; CHECK-LABEL: _Z1fiiii:
-; ...
-; CHECK: pac r12, lr, sp
-; CHECK-NEXT: .save {r4, r5, r6, lr}
-; CHECK-NEXT: push {r4, r5, r6, lr}
+define hidden i32 @_Z1giiii(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
+; CHECK-LABEL: _Z1giiii:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: @ %bb.0: @ %entry
+; CHECK-NEXT: pac r12, lr, sp
+; CHECK-NEXT: .save {r4, r5, r6, lr}
+; CHECK-NEXT: push {r4, r5, r6, lr}
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset lr, -4
; CHECK-NEXT: .cfi_offset r6, -8
; CHECK-NEXT: .cfi_offset r5, -12
; CHECK-NEXT: .cfi_offset r4, -16
-; CHECK-NEXT: .save {ra_auth_code}
-; CHECK-NEXT: str r12, [sp, #-4]!
+; CHECK-NEXT: .save {ra_auth_code}
+; CHECK-NEXT: str r12, [sp, #-4]!
; CHECK-NEXT: .cfi_def_cfa_offset 20
; CHECK-NEXT: .cfi_offset ra_auth_code, -20
-; CHECK-NEXT: .pad #4
-; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .cfi_def_cfa_offset 24
-; ...
-; CHECK: bl OUTLINED_FUNCTION_0
-; ...
-; CHECK: add sp, #4
-; CHECK-NEXT: ldr r12, [sp], #4
-; CHECK-NEXT: pop.w {r4, r5, r6, lr}
-; CHECK-NEXT: aut r12, lr, sp
-; CHECK-NEXT: bx lr
-
-
-
-define hidden i32 @_Z1giiii(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: bmi .LBB2_2
+; CHECK-NEXT: @ %bb.1: @ %if.end
+; CHECK-NEXT: bl OUTLINED_FUNCTION_0
+; CHECK-NEXT: adds r1, r0, r6
+; CHECK-NEXT: muls r0, r1, r0
+; CHECK-NEXT: adds r1, r4, r5
+; CHECK-NEXT: sdiv r0, r0, r1
+; CHECK-NEXT: adds r0, #1
+; CHECK-NEXT: b .LBB2_3
+; CHECK-NEXT: .LBB2_2:
+; CHECK-NEXT: mov.w r0, #-1
+; CHECK-NEXT: .LBB2_3: @ %return
+; CHECK-NEXT: add sp, #4
+; CHECK-NEXT: ldr r12, [sp], #4
+; CHECK-NEXT: pop.w {r4, r5, r6, lr}
+; CHECK-NEXT: aut r12, lr, sp
+; CHECK-NEXT: bx lr
entry:
%cmp = icmp slt i32 %a, 0
br i1 %cmp, label %return, label %if.end
@@ -132,33 +195,6 @@ return: ; preds = %entry, %if.end
ret i32 %retval.0
}
-; CHECK-LABEL: _Z1giiii:
-; ...
-; CHECK: pac r12, lr, sp
-; CHECK-NEXT: .save {r4, r5, r6, lr}
-; CHECK-NEXT: push {r4, r5, r6, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset lr, -4
-; CHECK-NEXT: .cfi_offset r6, -8
-; CHECK-NEXT: .cfi_offset r5, -12
-; CHECK-NEXT: .cfi_offset r4, -16
-; CHECK-NEXT: .save {ra_auth_code}
-; CHECK-NEXT: str r12, [sp, #-4]!
-; CHECK-NEXT: .cfi_def_cfa_offset 20
-; CHECK-NEXT: .cfi_offset ra_auth_code, -20
-; CHECK-NEXT: .pad #4
-; CHECK-NEXT: sub sp, #4
-; CHECK-NEXT: .cfi_def_cfa_offset 24
-; ...
-; CHECK: bl OUTLINED_FUNCTION_0
-; ...
-; CHECK: add sp, #4
-; CHECK-NEXT: ldr r12, [sp], #4
-; CHECK-NEXT: pop.w {r4, r5, r6, lr}
-; CHECK-NEXT: aut r12, lr, sp
-; CHECK-NEXT: bx lr
-
-
; CHEK-LABEL: OUTLINED_FUNCTION_0:
; CHECK-NOT: pac
; CHECK-NOT: aut
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-overalign.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-overalign.ll
index 5dce6752c065e1..012120d976810b 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-overalign.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-overalign.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s
; RUN: llc --filetype=obj %s -o - | llvm-readelf --unwind - | FileCheck %s --check-prefix=UNWIND
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
@@ -15,6 +16,42 @@ target triple = "thumbv8.1m.main-arm-none-eabi"
; }
define hidden i32 @_Z1fv() local_unnamed_addr "sign-return-address"="non-leaf" {
+; CHECK-LABEL: _Z1fv:
+; CHECK: .cfi_sections .debug_frame
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: @ %bb.0: @ %entry
+; CHECK-NEXT: pac r12, lr, sp
+; CHECK-NEXT: .save {r4, r6, r7, lr}
+; CHECK-NEXT: push {r4, r6, r7, lr}
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset lr, -4
+; CHECK-NEXT: .cfi_offset r7, -8
+; CHECK-NEXT: .cfi_offset r6, -12
+; CHECK-NEXT: .cfi_offset r4, -16
+; CHECK-NEXT: .setfp r7, sp, #8
+; CHECK-NEXT: add r7, sp, #8
+; CHECK-NEXT: .cfi_def_cfa r7, 8
+; CHECK-NEXT: .save {ra_auth_code}
+; CHECK-NEXT: str r12, [sp, #-4]!
+; CHECK-NEXT: .cfi_offset ra_auth_code, -20
+; CHECK-NEXT: .pad #44
+; CHECK-NEXT: sub sp, #44
+; CHECK-NEXT: mov r4, sp
+; CHECK-NEXT: bfc r4, #0, #5
+; CHECK-NEXT: mov sp, r4
+; CHECK-NEXT: mov r1, sp
+; CHECK-NEXT: movs r0, #4
+; CHECK-NEXT: bl _Z1giPi
+; CHECK-NEXT: ldm.w sp, {r0, r1, r2, r3}
+; CHECK-NEXT: sub.w r4, r7, #12
+; CHECK-NEXT: add r0, r1
+; CHECK-NEXT: add r0, r2
+; CHECK-NEXT: add r0, r3
+; CHECK-NEXT: mov sp, r4
+; CHECK-NEXT: ldr r12, [sp], #4
+; CHECK-NEXT: pop.w {r4, r6, r7, lr}
+; CHECK-NEXT: aut r12, lr, sp
+; CHECK-NEXT: bx lr
entry:
%a = alloca [4 x i32], align 32
%call = call i32 @_Z1giPi(i32 4, ptr nonnull %a)
@@ -31,29 +68,6 @@ entry:
ret i32 %add.3
}
-; CHECK-LABEL: _Z1fv:
-; CHECK: pac r12, lr, sp
-; CHECK: .save {r4, r6, r7, lr}
-; CHECK-NEXT: push {r4, r6, r7, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset lr, -4
-; CHECK-NEXT: .cfi_offset r7, -8
-; CHECK-NEXT: .cfi_offset r6, -12
-; CHECK-NEXT: .cfi_offset r4, -16
-; CHECK-NEXT: .setfp r7, sp, #8
-; CHECK-NEXT: add r7, sp, #8
-; CHECK-NEXT: .cfi_def_cfa r7, 8
-; CHECK-NEXT: .save {ra_auth_code}
-; CHECK-NEXT: str r12, [sp, #-4]!
-; CHECK-NEXT: .cfi_offset ra_auth_code, -20
-; CHECK-NEXT: .pad #44
-; CHECK-NEXT: sub sp, #44
-; CHECK: ldr r12, [sp], #4
-; CHECK-NEXT: pop.w {r4, r6, r7, lr}
-; CHECK-NEXT: aut r12, lr, sp
-; CHECK-NEXT: bx lr
-
-
declare dso_local i32 @_Z1giPi(i32, ptr) local_unnamed_addr
!llvm.module.flags = !{!0, !1, !2}
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll
index d027c9e8c7b548..63adc78fe849c3 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main-arm-none-eabi"
@@ -5,6 +6,54 @@ target triple = "thumbv8.1m.main-arm-none-eabi"
%"struct.std::__va_list" = type { ptr }
define hidden i32 @_Z1fiz(i32 %n, ...) local_unnamed_addr #0 {
+; CHECK-LABEL: _Z1fiz:
+; CHECK: .cfi_sections .debug_frame
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: @ %bb.0: @ %entry
+; CHECK-NEXT: pac r12, lr, sp
+; CHECK-NEXT: .pad #12
+; CHECK-NEXT: sub sp, #12
+; CHECK-NEXT: .cfi_def_cfa_offset 12
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .cfi_def_cfa_offset 20
+; CHECK-NEXT: .cfi_offset lr, -16
+; CHECK-NEXT: .cfi_offset r7, -20
+; CHECK-NEXT: .save {ra_auth_code}
+; CHECK-NEXT: str r12, [sp, #-4]!
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: .cfi_offset ra_auth_code, -24
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: .cfi_def_cfa_offset 28
+; CHECK-NEXT: add.w r12, sp, #16
+; CHECK-NEXT: cmp r0, #1
+; CHECK-NEXT: stm.w r12, {r1, r2, r3}
+; CHECK-NEXT: add r1, sp, #16
+; CHECK-NEXT: str r1, [sp]
+; CHECK-NEXT: blt .LBB0_3
+; CHECK-NEXT: @ %bb.1: @ %for.body.lr.ph
+; CHECK-NEXT: ldr r1, [sp]
+; CHECK-NEXT: dls lr, r0
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: adds r1, #4
+; CHECK-NEXT: .LBB0_2: @ %for.body
+; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: str r1, [sp]
+; CHECK-NEXT: ldr r2, [r1, #-4]
+; CHECK-NEXT: adds r1, #4
+; CHECK-NEXT: add r0, r2
+; CHECK-NEXT: le lr, .LBB0_2
+; CHECK-NEXT: b .LBB0_4
+; CHECK-NEXT: .LBB0_3:
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: .LBB0_4: @ %for.cond.cleanup
+; CHECK-NEXT: add sp, #4
+; CHECK-NEXT: ldr r12, [sp], #4
+; CHECK-NEXT: pop.w {r7, lr}
+; CHECK-NEXT: add sp, #12
+; CHECK-NEXT: aut r12, lr, sp
+; CHECK-NEXT: bx lr
entry:
%ap = alloca %"struct.std::__va_list", align 4
call void @llvm.va_start(ptr nonnull %ap)
@@ -33,34 +82,6 @@ for.body: ; preds = %for.body.lr.ph, %fo
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
}
-; CHECK-LABEL: _Z1fiz:
-; CHECK: pac r12, lr, sp
-; CHECK-NEXT: .pad #12
-; CHECK-NEXT: sub sp, #12
-; CHECK-NEXT: .cfi_def_cfa_offset 12
-; CHECK-NEXT: .save {r7, lr}
-; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 20
-; CHECK-NEXT: .cfi_offset lr, -16
-; CHECK-NEXT: .cfi_offset r7, -20
-; CHECK-NEXT: .save {ra_auth_code}
-; CHECK-NEXT: str r12, [sp, #-4]!
-; CHECK-NEXT: .cfi_def_cfa_offset 24
-; CHECK-NEXT: .cfi_offset ra_auth_code, -24
-; CHECK-NEXT: .pad #4
-; CHECK-NEXT: sub sp, #4
-; CHECK-NEXT: .cfi_def_cfa_offset 28
-; ...
-; CHECK: add.w r[[N:[0-9]*]], sp, #16
-; CHECK: stm.w r[[N]], {r1, r2, r3}
-; ...
-; CHECK: add sp, #4
-; CHECK-NEXT: ldr r12, [sp], #4
-; CHECK-NEXT: pop.w {r7, lr}
-; CHECK-NEXT: add sp, #12
-; CHECK-NEXT: aut r12, lr, sp
-; CHECK-NEXT: bx lr
-
declare void @llvm.va_start(ptr) #1
declare void @llvm.va_end(ptr) #1
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-2.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-2.ll
index 8019cd5b6109eb..38b5b7a16e01bd 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-2.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-2.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s
; RUN: llc --filetype=obj %s -o - | llvm-readelf --unwind - | FileCheck %s --check-prefix=UNWIND
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
@@ -19,6 +20,54 @@ target triple = "thumbv8.1m.main-arm-none-eabi"
%"struct.std::__va_list" = type { ptr }
define hidden i32 @_Z1fiz(i32 %n, ...) local_unnamed_addr #0 {
+; CHECK-LABEL: _Z1fiz:
+; CHECK: .cfi_sections .debug_frame
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: @ %bb.0: @ %entry
+; CHECK-NEXT: pac r12, lr, sp
+; CHECK-NEXT: .pad #12
+; CHECK-NEXT: sub sp, #12
+; CHECK-NEXT: .cfi_def_cfa_offset 12
+; CHECK-NEXT: .save {r4, r5, r7, lr}
+; CHECK-NEXT: push {r4, r5, r7, lr}
+; CHECK-NEXT: .cfi_def_cfa_offset 28
+; CHECK-NEXT: .cfi_offset lr, -16
+; CHECK-NEXT: .cfi_offset r7, -20
+; CHECK-NEXT: .cfi_offset r5, -24
+; CHECK-NEXT: .cfi_offset r4, -28
+; CHECK-NEXT: .save {ra_auth_code}
+; CHECK-NEXT: str r12, [sp, #-4]!
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: .cfi_offset ra_auth_code, -32
+; CHECK-NEXT: .pad #8
+; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: .cfi_def_cfa_offset 40
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: add r0, sp, #28
+; CHECK-NEXT: movs r5, #0
+; CHECK-NEXT: cmp r4, #1
+; CHECK-NEXT: stm r0!, {r1, r2, r3}
+; CHECK-NEXT: add r0, sp, #28
+; CHECK-NEXT: str r0, [sp, #4]
+; CHECK-NEXT: blt .LBB0_2
+; CHECK-NEXT: .LBB0_1: @ %for.body
+; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ldr r0, [sp, #4]
+; CHECK-NEXT: adds r1, r0, #4
+; CHECK-NEXT: str r1, [sp, #4]
+; CHECK-NEXT: ldr r0, [r0]
+; CHECK-NEXT: bl _Z1gi
+; CHECK-NEXT: add r5, r0
+; CHECK-NEXT: subs r4, #1
+; CHECK-NEXT: bne .LBB0_1
+; CHECK-NEXT: .LBB0_2: @ %for.cond.cleanup
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: ldr r12, [sp], #4
+; CHECK-NEXT: pop.w {r4, r5, r7, lr}
+; CHECK-NEXT: add sp, #12
+; CHECK-NEXT: aut r12, lr, sp
+; CHECK-NEXT: bx lr
entry:
%ap = alloca %"struct.std::__va_list", align 4
call void @llvm.va_start(ptr nonnull %ap)
@@ -47,36 +96,6 @@ for.body: ; preds = %for.body.lr.ph, %fo
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
}
-; CHECK-LABEL: _Z1fiz:
-; CHECK: pac r12, lr, sp
-; CHECK-NEXT: .pad #12
-; CHECK-NEXT: sub sp, #12
-; CHECK-NEXT: .cfi_def_cfa_offset 12
-; CHECK-NEXT: .save {r4, r5, r7, lr}
-; CHECK-NEXT: push {r4, r5, r7, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 28
-; CHECK-NEXT: .cfi_offset lr, -16
-; CHECK-NEXT: .cfi_offset r7, -20
-; CHECK-NEXT: .cfi_offset r5, -24
-; CHECK-NEXT: .cfi_offset r4, -28
-; CHECK-NEXT: .save {ra_auth_code}
-; CHECK-NEXT: str r12, [sp, #-4]!
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: .cfi_offset ra_auth_code, -32
-; CHECK-NEXT: .pad #8
-; CHECK-NEXT: sub sp, #8
-; CHECK-NEXT: .cfi_def_cfa_offset 40
-; ...
-; CHECK: add r[[N:[0-9]*]], sp, #28
-; CHECK: stm r[[N]]!, {r1, r2, r3}
-; ...
-; CHECK: add sp, #8
-; CHECK-NEXT: ldr r12, [sp], #4
-; CHECK-NEXT: pop.w {r4, r5, r7, lr}
-; CHECK-NEXT: add sp, #12
-; CHECK-NEXT: aut r12, lr, sp
-; CHECK-NEXT: bx lr
-
declare void @llvm.va_start(ptr) #1
declare void @llvm.va_end(ptr) #1
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll
index c1d17a7587be05..ccab35b7331141 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main-arm-none-eabi"
@@ -14,6 +15,92 @@ target triple = "thumbv8.1m.main-arm-none-eabi"
; }
define hidden i32 @f(i32 %n) local_unnamed_addr #0 {
+; CHECK-LABEL: f:
+; CHECK: .cfi_sections .debug_frame
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: @ %bb.0: @ %entry
+; CHECK-NEXT: pac r12, lr, sp
+; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, lr}
+; CHECK-NEXT: .cfi_def_cfa_offset 20
+; CHECK-NEXT: .cfi_offset lr, -4
+; CHECK-NEXT: .cfi_offset r7, -8
+; CHECK-NEXT: .cfi_offset r6, -12
+; CHECK-NEXT: .cfi_offset r5, -16
+; CHECK-NEXT: .cfi_offset r4, -20
+; CHECK-NEXT: .setfp r7, sp, #12
+; CHECK-NEXT: add r7, sp, #12
+; CHECK-NEXT: .cfi_def_cfa r7, 8
+; CHECK-NEXT: .save {r8, r9, ra_auth_code}
+; CHECK-NEXT: push.w {r8, r9, r12}
+; CHECK-NEXT: .cfi_offset ra_auth_code, -24
+; CHECK-NEXT: .cfi_offset r9, -28
+; CHECK-NEXT: .cfi_offset r8, -32
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: movs r0, #7
+; CHECK-NEXT: add.w r0, r0, r5, lsl #2
+; CHECK-NEXT: bic r0, r0, #7
+; CHECK-NEXT: sub.w r4, sp, r0
+; CHECK-NEXT: mov sp, r4
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: bl g
+; CHECK-NEXT: cmp r5, #1
+; CHECK-NEXT: blt .LBB0_3
+; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
+; CHECK-NEXT: subs r0, r5, #1
+; CHECK-NEXT: and r12, r5, #3
+; CHECK-NEXT: cmp r0, #3
+; CHECK-NEXT: bhs .LBB0_4
+; CHECK-NEXT: @ %bb.2:
+; CHECK-NEXT: movs r2, #0
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: b .LBB0_6
+; CHECK-NEXT: .LBB0_3:
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: b .LBB0_9
+; CHECK-NEXT: .LBB0_4: @ %for.body.preheader.new
+; CHECK-NEXT: bic r0, r5, #3
+; CHECK-NEXT: movs r2, #1
+; CHECK-NEXT: subs r0, #4
+; CHECK-NEXT: sub.w r3, r4, #16
+; CHECK-NEXT: add.w lr, r2, r0, lsr #2
+; CHECK-NEXT: movs r2, #0
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: .LBB0_5: @ %for.body
+; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ldr r5, [r3, #16]!
+; CHECK-NEXT: adds r2, #4
+; CHECK-NEXT: add r0, r5
+; CHECK-NEXT: ldrd r5, r1, [r3, #4]
+; CHECK-NEXT: ldr r6, [r3, #12]
+; CHECK-NEXT: add r0, r5
+; CHECK-NEXT: add r0, r1
+; CHECK-NEXT: add r0, r6
+; CHECK-NEXT: le lr, .LBB0_5
+; CHECK-NEXT: .LBB0_6: @ %for.cond.cleanup.loopexit.unr-lcssa
+; CHECK-NEXT: cmp.w r12, #0
+; CHECK-NEXT: beq .LBB0_9
+; CHECK-NEXT: @ %bb.7: @ %for.body.epil
+; CHECK-NEXT: ldr.w r3, [r4, r2, lsl #2]
+; CHECK-NEXT: cmp.w r12, #1
+; CHECK-NEXT: add r0, r3
+; CHECK-NEXT: beq .LBB0_9
+; CHECK-NEXT: @ %bb.8: @ %for.body.epil.1
+; CHECK-NEXT: add.w r2, r4, r2, lsl #2
+; CHECK-NEXT: cmp.w r12, #2
+; CHECK-NEXT: ldr r1, [r2, #4]
+; CHECK-NEXT: add r0, r1
+; CHECK-NEXT: itt ne
+; CHECK-NEXT: ldrne r1, [r2, #8]
+; CHECK-NEXT: addne r0, r1
+; CHECK-NEXT: .LBB0_9: @ %for.cond.cleanup
+; CHECK-NEXT: sub.w r4, r7, #24
+; CHECK-NEXT: mov sp, r4
+; CHECK-NEXT: pop.w {r8, r9, r12}
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, lr}
+; CHECK-NEXT: aut r12, lr, sp
+; CHECK-NEXT: bx lr
entry:
%vla = alloca i32, i32 %n, align 4
%call = call i32 @g(i32 %n, ptr nonnull %vla) #0
@@ -88,32 +175,6 @@ for.body.epil.2: ; preds = %for.body.epil.1
br label %for.cond.cleanup
}
-; CHECK-LABEL: f:
-; CHECK: pac r12, lr, sp
-; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
-; CHECK-NEXT: push {r4, r5, r6, r7, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 20
-; CHECK-NEXT: .cfi_offset lr, -4
-; CHECK-NEXT: .cfi_offset r7, -8
-; CHECK-NEXT: .cfi_offset r6, -12
-; CHECK-NEXT: .cfi_offset r5, -16
-; CHECK-NEXT: .cfi_offset r4, -20
-; CHECK-NEXT: .setfp r7, sp, #12
-; CHECK-NEXT: add r7, sp, #12
-; CHECK-NEXT: .cfi_def_cfa r7, 8
-; CHECK-NEXT: .save {r8, r9, ra_auth_code}
-; CHECK-NEXT: push.w {r8, r9, r12}
-; CHECK-NEXT: .cfi_offset ra_auth_code, -24
-; CHECK-NEXT: .cfi_offset r9, -28
-; CHECK-NEXT: .cfi_offset r8, -32
-; ...
-; CHECK: sub.w r[[N:[0-9]*]], r7, #24
-; CHECK-NEXT: mov sp, r[[N]]
-; CHECK-NEXT: pop.w {r8, r9, r12}
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, lr}
-; CHECK-NEXT: aut r12, lr, sp
-; CHECK-NEXT: bx lr
-
declare dso_local i32 @g(i32, ptr) local_unnamed_addr #0
attributes #0 = { nounwind "sign-return-address"="non-leaf"}
>From ba1490891f4988933ef905f9c06633dce5bdbce0 Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Fri, 27 Sep 2024 13:48:55 +0100
Subject: [PATCH 9/9] [ARM] Optimise non-ABI frame pointers
With -fomit-frame-pointer, even if we set up a frame pointer for other
reasons (e.g. variable-sized or over-aligned stack allocations), we
don't need to create an ABI-compliant frame record. This means that we
can save all of the general-purpose registers in one push, instead of
splitting it to ensure that the frame pointer and link register are
adjacent on the stack, saving two instructions per function.
---
llvm/lib/Target/ARM/ARMFrameLowering.cpp | 11 +
llvm/lib/Target/ARM/ARMSubtarget.cpp | 30 +-
llvm/lib/Target/ARM/ARMSubtarget.h | 4 +
llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll | 22 +-
llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll | 72 ++-
.../CodeGen/Thumb2/pacbti-m-frame-chain.ll | 434 ++++++++++++++----
.../Thumb2/pacbti-m-indirect-tail-call.ll | 9 +-
.../CodeGen/Thumb2/pacbti-m-outliner-3.ll | 62 +--
.../CodeGen/Thumb2/pacbti-m-outliner-4.ll | 98 ++--
.../test/CodeGen/Thumb2/pacbti-m-overalign.ll | 30 +-
.../test/CodeGen/Thumb2/pacbti-m-stack-arg.ll | 9 +-
.../test/CodeGen/Thumb2/pacbti-m-varargs-1.ll | 16 +-
.../test/CodeGen/Thumb2/pacbti-m-varargs-2.ll | 30 +-
llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll | 29 +-
14 files changed, 523 insertions(+), 333 deletions(-)
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 3273943db2139d..e02bb170fd3f54 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -2964,6 +2964,17 @@ bool ARMFrameLowering::assignCalleeSavedSpillSlots(
// on the stack.
CSI.insert(CSI.begin(), CalleeSavedInfo(ARM::R12));
break;
+ case ARMSubtarget::NoSplit:
+ assert(!MF.getTarget().Options.DisableFramePointerElim(MF) &&
+ "ABI-required frame pointers need a CSR split when signing return "
+ "address.");
+ CSI.insert(find_if(CSI,
+ [=](const auto &CS) {
+ Register Reg = CS.getReg();
+ return Reg != ARM::LR;
+ }),
+ CalleeSavedInfo(ARM::R12));
+ break;
default:
llvm_unreachable("Unexpected CSR split with return address signing");
}
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp
index 9adfb1fab5f084..e3978232540b8c 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -492,17 +492,16 @@ ARMSubtarget::getPushPopSplitVariation(const MachineFunction &MF) const {
const std::vector<CalleeSavedInfo> CSI =
MF.getFrameInfo().getCalleeSavedInfo();
- // Returns SplitR7 if the frame setup must be split into two separate pushes
- // of r0-r7,lr and another containing r8-r11 (+r12 if necessary). This is
- // always required on Thumb1-only targets, as the push and pop instructions
- // can't access the high registers. This is also required when R7 is the frame
- // pointer and frame pointer elimiination is disabled, or branch signing is
- // enabled and AAPCS is disabled.
- if ((MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress() &&
- !createAAPCSFrameChain()) ||
- (getFramePointerReg() == ARM::R7 &&
- MF.getTarget().Options.DisableFramePointerElim(MF)) ||
- isThumb1Only())
+ // Thumb1 always splits the pushes at R7, because the Thumb1 push instruction
+ // cannot use high registers except for lr.
+ if (isThumb1Only())
+ return SplitR7;
+
+ // If R7 is the frame pointer, we must split at R7 to ensure that the
+ // previous frame pointer (R7) and return address (LR) are adjacent on the
+ // stack, to form a valid frame record.
+ if (getFramePointerReg() == ARM::R7 &&
+ MF.getTarget().Options.DisableFramePointerElim(MF))
return SplitR7;
// Returns SplitR11WindowsSEH when the stack pointer needs to be
@@ -515,11 +514,12 @@ ARMSubtarget::getPushPopSplitVariation(const MachineFunction &MF) const {
(MFI.hasVarSizedObjects() || getRegisterInfo()->hasStackRealignment(MF)))
return SplitR11WindowsSEH;
- // Returns R11SplitAAPCSBranchSigning if R11 and lr are not adjacent to each
- // other in the list of callee saved registers in a frame, and branch
- // signing is enabled.
+ // Returns SplitR11AAPCSSignRA when the frame pointer is R11, requiring R11
+ // and LR to be adjacent on the stack, and branch signing is enabled,
+ // requiring R12 to be on the stack.
if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress() &&
- getFramePointerReg() == ARM::R11)
+ getFramePointerReg() == ARM::R11 &&
+ MF.getTarget().Options.DisableFramePointerElim(MF))
return SplitR11AAPCSSignRA;
return NoSplit;
}
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index 197911a2c0a7ac..238a8fcb70cbea 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -95,6 +95,10 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
/// push {r0-r7, lr}
/// push {r8-r12}
/// vpush {d8-d15}
+ /// Note that Thumb1 changes this layout when the frame pointer is R11,
+ /// using a longer sequence of instructions because R11 can't be used by a
+ /// Thumb1 push instruction. This doesn't currently have a separate enum
+ /// value, and is handled entriely within Thumb1FrameLowering::emitPrologue.
SplitR7,
/// When the stack frame size if now known (because of variable-sized
diff --git a/llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll b/llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll
index c2a2ed2d0c8e8a..a0e6f9bf9b30d9 100644
--- a/llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll
+++ b/llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll
@@ -11,15 +11,12 @@ define hidden i32 @_Z1fi(i32 %x) "sign-return-address"="non-leaf" "sign-return-a
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: @ %bb.0: @ %entry
; CHECK-NEXT: pacbti r12, lr, sp
-; CHECK-NEXT: .save {r7, lr}
-; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: .cfi_offset lr, -4
-; CHECK-NEXT: .cfi_offset r7, -8
-; CHECK-NEXT: .save {ra_auth_code}
-; CHECK-NEXT: str r12, [sp, #-4]!
+; CHECK-NEXT: .save {r7, ra_auth_code, lr}
+; CHECK-NEXT: push.w {r7, r12, lr}
; CHECK-NEXT: .cfi_def_cfa_offset 12
-; CHECK-NEXT: .cfi_offset ra_auth_code, -12
+; CHECK-NEXT: .cfi_offset lr, -4
+; CHECK-NEXT: .cfi_offset ra_auth_code, -8
+; CHECK-NEXT: .cfi_offset r7, -12
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .cfi_def_cfa_offset 16
@@ -27,8 +24,7 @@ define hidden i32 @_Z1fi(i32 %x) "sign-return-address"="non-leaf" "sign-return-a
; CHECK-NEXT: bl _Z1gi
; CHECK-NEXT: subs r0, #1
; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: ldr r12, [sp], #4
-; CHECK-NEXT: pop.w {r7, lr}
+; CHECK-NEXT: pop.w {r7, r12, lr}
; CHECK-NEXT: aut r12, lr, sp
; CHECK-NEXT: bx lr
entry:
@@ -42,6 +38,8 @@ declare dso_local i32 @_Z1gi(i32)
; UNWIND-LABEL: Opcodes [
; UNWIND-NEXT: 0x00 ; vsp = vsp + 4
+; UNWIND-NEXT: 0x80 0x08 ; pop {r7}
; UNWIND-NEXT: 0xB4 ; pop ra_auth_code
-; UNWIND-NEXT: 0x84 0x08 ; pop {r7, lr}
-; UNWIND-NEXT: 0xB0 ; finish
+; UNWIND-NEXT: 0x84 0x00 ; pop {lr}
+
+
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll
index 64c5a6c7030145..31f8ecddcb986c 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll
@@ -22,15 +22,12 @@ define hidden i32 @f0(i32 %x) local_unnamed_addr "sign-return-address"="non-leaf
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: @ %bb.0: @ %entry
; CHECK-NEXT: pac r12, lr, sp
-; CHECK-NEXT: .save {r7, lr}
-; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: .cfi_offset lr, -4
-; CHECK-NEXT: .cfi_offset r7, -8
-; CHECK-NEXT: .save {ra_auth_code}
-; CHECK-NEXT: str r12, [sp, #-4]!
+; CHECK-NEXT: .save {r7, ra_auth_code, lr}
+; CHECK-NEXT: push.w {r7, r12, lr}
; CHECK-NEXT: .cfi_def_cfa_offset 12
-; CHECK-NEXT: .cfi_offset ra_auth_code, -12
+; CHECK-NEXT: .cfi_offset lr, -4
+; CHECK-NEXT: .cfi_offset ra_auth_code, -8
+; CHECK-NEXT: .cfi_offset r7, -12
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .cfi_def_cfa_offset 16
@@ -38,8 +35,7 @@ define hidden i32 @f0(i32 %x) local_unnamed_addr "sign-return-address"="non-leaf
; CHECK-NEXT: bl g
; CHECK-NEXT: adds r0, #1
; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: ldr r12, [sp], #4
-; CHECK-NEXT: pop.w {r7, lr}
+; CHECK-NEXT: pop.w {r7, r12, lr}
; CHECK-NEXT: aut r12, lr, sp
; CHECK-NEXT: bx lr
entry:
@@ -56,20 +52,16 @@ define hidden i32 @f1(i32 %x) local_unnamed_addr #0 {
; CHECK-NEXT: pac r12, lr, sp
; CHECK-NEXT: vstr fpcxtns, [sp, #-4]!
; CHECK-NEXT: .cfi_def_cfa_offset 4
-; CHECK-NEXT: .save {r7, lr}
-; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 12
-; CHECK-NEXT: .cfi_offset lr, -8
-; CHECK-NEXT: .cfi_offset r7, -12
-; CHECK-NEXT: .save {ra_auth_code}
-; CHECK-NEXT: str r12, [sp, #-4]!
+; CHECK-NEXT: .save {r7, ra_auth_code, lr}
+; CHECK-NEXT: push.w {r7, r12, lr}
; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset ra_auth_code, -16
+; CHECK-NEXT: .cfi_offset lr, -8
+; CHECK-NEXT: .cfi_offset ra_auth_code, -12
+; CHECK-NEXT: .cfi_offset r7, -16
; CHECK-NEXT: subs r0, #1
; CHECK-NEXT: bl g
; CHECK-NEXT: adds r0, #1
-; CHECK-NEXT: ldr r12, [sp], #4
-; CHECK-NEXT: pop.w {r7, lr}
+; CHECK-NEXT: pop.w {r7, r12, lr}
; CHECK-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr}
; CHECK-NEXT: vldr fpcxtns, [sp], #4
; CHECK-NEXT: aut r12, lr, sp
@@ -87,15 +79,12 @@ define hidden i32 @f2(i32 %x) local_unnamed_addr #1 {
; CHECK: .cfi_startproc
; CHECK-NEXT: @ %bb.0: @ %entry
; CHECK-NEXT: pac r12, lr, sp
-; CHECK-NEXT: .save {r7, lr}
-; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: .cfi_offset lr, -4
-; CHECK-NEXT: .cfi_offset r7, -8
-; CHECK-NEXT: .save {ra_auth_code}
-; CHECK-NEXT: str r12, [sp, #-4]!
+; CHECK-NEXT: .save {r7, ra_auth_code, lr}
+; CHECK-NEXT: push.w {r7, r12, lr}
; CHECK-NEXT: .cfi_def_cfa_offset 12
-; CHECK-NEXT: .cfi_offset ra_auth_code, -12
+; CHECK-NEXT: .cfi_offset lr, -4
+; CHECK-NEXT: .cfi_offset ra_auth_code, -8
+; CHECK-NEXT: .cfi_offset r7, -12
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .cfi_def_cfa_offset 16
@@ -103,8 +92,7 @@ define hidden i32 @f2(i32 %x) local_unnamed_addr #1 {
; CHECK-NEXT: bl g
; CHECK-NEXT: adds r0, #1
; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: ldr r12, [sp], #4
-; CHECK-NEXT: pop.w {r7, lr}
+; CHECK-NEXT: pop.w {r7, r12, lr}
; CHECK-NEXT: aut r12, lr, sp
; CHECK-NEXT: mrs r12, control
; CHECK-NEXT: tst.w r12, #8
@@ -149,22 +137,22 @@ attributes #1 = { "sign-return-address"="non-leaf" "cmse_nonsecure_entry" "targe
; UNWIND-LABEL: FunctionAddress: 0x0
; UNWIND: 0x00 ; vsp = vsp + 4
+; UNWIND-NEXT: 0x80 0x08 ; pop {r7}
; UNWIND-NEXT: 0xB4 ; pop ra_auth_code
-; UNWIND-NEXT: 0x84 0x08 ; pop {r7, lr}
-; UNWIND-NEXT: 0xB0 ; finish
-; UNWIND-NEXT: 0xB0 ; finish
+; UNWIND-NEXT: 0x84 0x00 ; pop {lr}
+
-; UNWIND-LABEL: FunctionAddress: 0x24
-; UNWIND: 0xB4 ; pop ra_auth_code
-; UNWIND-NEXT: 0x84 0x08 ; pop {r7, lr}
+; UNWIND-LABEL: FunctionAddress: 0x1E
+; UNWIND: 0x80 0x08 ; pop {r7}
+; UNWIND-NEXT: 0xB4 ; pop ra_auth_code
+; UNWIND-NEXT: 0x84 0x00 ; pop {lr}
-; UNWIND-LABEL: FunctionAddress: 0x54
+; UNWIND-LABEL: FunctionAddress: 0x48
; UNWIND: 0x00 ; vsp = vsp + 4
+; UNWIND-NEXT: 0x80 0x08 ; pop {r7}
; UNWIND-NEXT: 0xB4 ; pop ra_auth_code
-; UNWIND-NEXT: 0x84 0x08 ; pop {r7, lr}
-; UNWIND-NEXT: 0xB0 ; finish
-; UNWIND-NEXT: 0xB0 ; finish
+; UNWIND-NEXT: 0x84 0x00 ; pop {lr}
; UNWIND-LABEL: 00000001 {{.*}} f0
-; UNWIND-LABEL: 00000025 {{.*}} f1
-; UNWIND-LABEL: 00000055 {{.*}} f2
+; UNWIND-LABEL: 0000001f {{.*}} f1
+; UNWIND-LABEL: 00000049 {{.*}} f2
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll
index 8bcf87130c5400..e9c7f2236c0ffc 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll
@@ -1,32 +1,56 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=thumbv8.1m.main-none-eabi < %s --force-dwarf-frame-section -frame-pointer=all -mattr=+aapcs-frame-chain | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main-none-eabi < %s --force-dwarf-frame-section -frame-pointer=none | FileCheck %s --check-prefix=R7
+; RUN: llc -mtriple=thumbv8.1m.main-none-eabi < %s --force-dwarf-frame-section -frame-pointer=all | FileCheck %s --check-prefix=R7-ABI
+; RUN: llc -mtriple=thumbv8.1m.main-none-eabi < %s --force-dwarf-frame-section -frame-pointer=none -mattr=+aapcs-frame-chain | FileCheck %s --check-prefix=R11
+; RUN: llc -mtriple=thumbv8.1m.main-none-eabi < %s --force-dwarf-frame-section -frame-pointer=all -mattr=+aapcs-frame-chain | FileCheck %s --check-prefix=R11-ABI
; int test1() {
; return 0;
; }
define i32 @test1() "sign-return-address"="non-leaf" {
-; CHECK-LABEL: test1:
-; CHECK: .cfi_sections .debug_frame
-; CHECK-NEXT: .cfi_startproc
-; CHECK-NEXT: @ %bb.0: @ %entry
-; CHECK-NEXT: pac r12, lr, sp
-; CHECK-NEXT: .save {ra_auth_code}
-; CHECK-NEXT: str r12, [sp, #-4]!
-; CHECK-NEXT: .cfi_def_cfa_offset 4
-; CHECK-NEXT: .cfi_offset ra_auth_code, -4
-; CHECK-NEXT: .save {r11, lr}
-; CHECK-NEXT: push.w {r11, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 12
-; CHECK-NEXT: .cfi_offset lr, -8
-; CHECK-NEXT: .cfi_offset r11, -12
-; CHECK-NEXT: .setfp r11, sp
-; CHECK-NEXT: mov r11, sp
-; CHECK-NEXT: .cfi_def_cfa_register r11
-; CHECK-NEXT: movs r0, #0
-; CHECK-NEXT: pop.w {r11, lr}
-; CHECK-NEXT: ldr r12, [sp], #4
-; CHECK-NEXT: aut r12, lr, sp
-; CHECK-NEXT: bx lr
+; R7-LABEL: test1:
+; R7: .cfi_sections .debug_frame
+; R7-NEXT: .cfi_startproc
+; R7-NEXT: @ %bb.0: @ %entry
+; R7-NEXT: movs r0, #0
+; R7-NEXT: bx lr
+;
+; R7-ABI-LABEL: test1:
+; R7-ABI: .cfi_sections .debug_frame
+; R7-ABI-NEXT: .cfi_startproc
+; R7-ABI-NEXT: @ %bb.0: @ %entry
+; R7-ABI-NEXT: movs r0, #0
+; R7-ABI-NEXT: bx lr
+;
+; R11-LABEL: test1:
+; R11: .cfi_sections .debug_frame
+; R11-NEXT: .cfi_startproc
+; R11-NEXT: @ %bb.0: @ %entry
+; R11-NEXT: movs r0, #0
+; R11-NEXT: bx lr
+;
+; R11-ABI-LABEL: test1:
+; R11-ABI: .cfi_sections .debug_frame
+; R11-ABI-NEXT: .cfi_startproc
+; R11-ABI-NEXT: @ %bb.0: @ %entry
+; R11-ABI-NEXT: pac r12, lr, sp
+; R11-ABI-NEXT: .save {ra_auth_code}
+; R11-ABI-NEXT: str r12, [sp, #-4]!
+; R11-ABI-NEXT: .cfi_def_cfa_offset 4
+; R11-ABI-NEXT: .cfi_offset ra_auth_code, -4
+; R11-ABI-NEXT: .save {r11, lr}
+; R11-ABI-NEXT: push.w {r11, lr}
+; R11-ABI-NEXT: .cfi_def_cfa_offset 12
+; R11-ABI-NEXT: .cfi_offset lr, -8
+; R11-ABI-NEXT: .cfi_offset r11, -12
+; R11-ABI-NEXT: .setfp r11, sp
+; R11-ABI-NEXT: mov r11, sp
+; R11-ABI-NEXT: .cfi_def_cfa_register r11
+; R11-ABI-NEXT: movs r0, #0
+; R11-ABI-NEXT: pop.w {r11, lr}
+; R11-ABI-NEXT: ldr r12, [sp], #4
+; R11-ABI-NEXT: aut r12, lr, sp
+; R11-ABI-NEXT: bx lr
entry:
ret i32 0
}
@@ -36,37 +60,127 @@ entry:
; bar(a);
; }
define dso_local void @test2(i32 noundef %n) "sign-return-address"="non-leaf" {
-; CHECK-LABEL: test2:
-; CHECK: .cfi_startproc
-; CHECK-NEXT: @ %bb.0: @ %entry
-; CHECK-NEXT: pac r12, lr, sp
-; CHECK-NEXT: .save {r4, r7, ra_auth_code}
-; CHECK-NEXT: push.w {r4, r7, r12}
-; CHECK-NEXT: .cfi_def_cfa_offset 12
-; CHECK-NEXT: .cfi_offset ra_auth_code, -4
-; CHECK-NEXT: .cfi_offset r7, -8
-; CHECK-NEXT: .cfi_offset r4, -12
-; CHECK-NEXT: .save {r11, lr}
-; CHECK-NEXT: push.w {r11, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 20
-; CHECK-NEXT: .cfi_offset lr, -16
-; CHECK-NEXT: .cfi_offset r11, -20
-; CHECK-NEXT: .setfp r11, sp
-; CHECK-NEXT: mov r11, sp
-; CHECK-NEXT: .cfi_def_cfa_register r11
-; CHECK-NEXT: .pad #4
-; CHECK-NEXT: sub sp, #4
-; CHECK-NEXT: movs r1, #7
-; CHECK-NEXT: add.w r0, r1, r0, lsl #2
-; CHECK-NEXT: bic r0, r0, #7
-; CHECK-NEXT: sub.w r0, sp, r0
-; CHECK-NEXT: mov sp, r0
-; CHECK-NEXT: bl take_ptr
-; CHECK-NEXT: mov sp, r11
-; CHECK-NEXT: pop.w {r11, lr}
-; CHECK-NEXT: pop.w {r4, r7, r12}
-; CHECK-NEXT: aut r12, lr, sp
-; CHECK-NEXT: bx lr
+; R7-LABEL: test2:
+; R7: .cfi_startproc
+; R7-NEXT: @ %bb.0: @ %entry
+; R7-NEXT: pac r12, lr, sp
+; R7-NEXT: .save {r4, r6, r7, ra_auth_code, lr}
+; R7-NEXT: push.w {r4, r6, r7, r12, lr}
+; R7-NEXT: .cfi_def_cfa_offset 20
+; R7-NEXT: .cfi_offset lr, -4
+; R7-NEXT: .cfi_offset ra_auth_code, -8
+; R7-NEXT: .cfi_offset r7, -12
+; R7-NEXT: .cfi_offset r6, -16
+; R7-NEXT: .cfi_offset r4, -20
+; R7-NEXT: .setfp r7, sp, #8
+; R7-NEXT: add r7, sp, #8
+; R7-NEXT: .cfi_def_cfa r7, 12
+; R7-NEXT: .pad #4
+; R7-NEXT: sub sp, #4
+; R7-NEXT: movs r1, #7
+; R7-NEXT: add.w r0, r1, r0, lsl #2
+; R7-NEXT: bic r0, r0, #7
+; R7-NEXT: sub.w r0, sp, r0
+; R7-NEXT: mov sp, r0
+; R7-NEXT: bl take_ptr
+; R7-NEXT: sub.w r4, r7, #8
+; R7-NEXT: mov sp, r4
+; R7-NEXT: pop.w {r4, r6, r7, r12, lr}
+; R7-NEXT: aut r12, lr, sp
+; R7-NEXT: bx lr
+;
+; R7-ABI-LABEL: test2:
+; R7-ABI: .cfi_startproc
+; R7-ABI-NEXT: @ %bb.0: @ %entry
+; R7-ABI-NEXT: pac r12, lr, sp
+; R7-ABI-NEXT: .save {r4, r6, r7, lr}
+; R7-ABI-NEXT: push {r4, r6, r7, lr}
+; R7-ABI-NEXT: .cfi_def_cfa_offset 16
+; R7-ABI-NEXT: .cfi_offset lr, -4
+; R7-ABI-NEXT: .cfi_offset r7, -8
+; R7-ABI-NEXT: .cfi_offset r6, -12
+; R7-ABI-NEXT: .cfi_offset r4, -16
+; R7-ABI-NEXT: .setfp r7, sp, #8
+; R7-ABI-NEXT: add r7, sp, #8
+; R7-ABI-NEXT: .cfi_def_cfa r7, 8
+; R7-ABI-NEXT: .save {ra_auth_code}
+; R7-ABI-NEXT: str r12, [sp, #-4]!
+; R7-ABI-NEXT: .cfi_offset ra_auth_code, -20
+; R7-ABI-NEXT: .pad #4
+; R7-ABI-NEXT: sub sp, #4
+; R7-ABI-NEXT: movs r1, #7
+; R7-ABI-NEXT: add.w r0, r1, r0, lsl #2
+; R7-ABI-NEXT: bic r0, r0, #7
+; R7-ABI-NEXT: sub.w r0, sp, r0
+; R7-ABI-NEXT: mov sp, r0
+; R7-ABI-NEXT: bl take_ptr
+; R7-ABI-NEXT: sub.w r4, r7, #12
+; R7-ABI-NEXT: mov sp, r4
+; R7-ABI-NEXT: ldr r12, [sp], #4
+; R7-ABI-NEXT: pop.w {r4, r6, r7, lr}
+; R7-ABI-NEXT: aut r12, lr, sp
+; R7-ABI-NEXT: bx lr
+;
+; R11-LABEL: test2:
+; R11: .cfi_startproc
+; R11-NEXT: @ %bb.0: @ %entry
+; R11-NEXT: pac r12, lr, sp
+; R11-NEXT: .save {r4, r7, r11, ra_auth_code, lr}
+; R11-NEXT: push.w {r4, r7, r11, r12, lr}
+; R11-NEXT: .cfi_def_cfa_offset 20
+; R11-NEXT: .cfi_offset lr, -4
+; R11-NEXT: .cfi_offset ra_auth_code, -8
+; R11-NEXT: .cfi_offset r11, -12
+; R11-NEXT: .cfi_offset r7, -16
+; R11-NEXT: .cfi_offset r4, -20
+; R11-NEXT: .setfp r11, sp, #8
+; R11-NEXT: add.w r11, sp, #8
+; R11-NEXT: .cfi_def_cfa r11, 12
+; R11-NEXT: .pad #4
+; R11-NEXT: sub sp, #4
+; R11-NEXT: movs r1, #7
+; R11-NEXT: add.w r0, r1, r0, lsl #2
+; R11-NEXT: bic r0, r0, #7
+; R11-NEXT: sub.w r0, sp, r0
+; R11-NEXT: mov sp, r0
+; R11-NEXT: bl take_ptr
+; R11-NEXT: sub.w r4, r11, #8
+; R11-NEXT: mov sp, r4
+; R11-NEXT: pop.w {r4, r7, r11, r12, lr}
+; R11-NEXT: aut r12, lr, sp
+; R11-NEXT: bx lr
+;
+; R11-ABI-LABEL: test2:
+; R11-ABI: .cfi_startproc
+; R11-ABI-NEXT: @ %bb.0: @ %entry
+; R11-ABI-NEXT: pac r12, lr, sp
+; R11-ABI-NEXT: .save {r4, r7, ra_auth_code}
+; R11-ABI-NEXT: push.w {r4, r7, r12}
+; R11-ABI-NEXT: .cfi_def_cfa_offset 12
+; R11-ABI-NEXT: .cfi_offset ra_auth_code, -4
+; R11-ABI-NEXT: .cfi_offset r7, -8
+; R11-ABI-NEXT: .cfi_offset r4, -12
+; R11-ABI-NEXT: .save {r11, lr}
+; R11-ABI-NEXT: push.w {r11, lr}
+; R11-ABI-NEXT: .cfi_def_cfa_offset 20
+; R11-ABI-NEXT: .cfi_offset lr, -16
+; R11-ABI-NEXT: .cfi_offset r11, -20
+; R11-ABI-NEXT: .setfp r11, sp
+; R11-ABI-NEXT: mov r11, sp
+; R11-ABI-NEXT: .cfi_def_cfa_register r11
+; R11-ABI-NEXT: .pad #4
+; R11-ABI-NEXT: sub sp, #4
+; R11-ABI-NEXT: movs r1, #7
+; R11-ABI-NEXT: add.w r0, r1, r0, lsl #2
+; R11-ABI-NEXT: bic r0, r0, #7
+; R11-ABI-NEXT: sub.w r0, sp, r0
+; R11-ABI-NEXT: mov sp, r0
+; R11-ABI-NEXT: bl take_ptr
+; R11-ABI-NEXT: mov sp, r11
+; R11-ABI-NEXT: pop.w {r11, lr}
+; R11-ABI-NEXT: pop.w {r4, r7, r12}
+; R11-ABI-NEXT: aut r12, lr, sp
+; R11-ABI-NEXT: bx lr
entry:
%vla = alloca i32, i32 %n, align 4
call void @take_ptr(ptr noundef nonnull %vla)
@@ -81,49 +195,175 @@ entry:
; knr();
; }
define void @test3(i32 noundef %c, float noundef %e, i32 noundef %z) "sign-return-address"="non-leaf" {
-; CHECK-LABEL: test3:
-; CHECK: .cfi_startproc
-; CHECK-NEXT: @ %bb.0: @ %entry
-; CHECK-NEXT: pac r12, lr, sp
-; CHECK-NEXT: .save {r4, r5, r6, r7, ra_auth_code}
-; CHECK-NEXT: push.w {r4, r5, r6, r7, r12}
-; CHECK-NEXT: .cfi_def_cfa_offset 20
-; CHECK-NEXT: .cfi_offset ra_auth_code, -4
-; CHECK-NEXT: .cfi_offset r7, -8
-; CHECK-NEXT: .cfi_offset r6, -12
-; CHECK-NEXT: .cfi_offset r5, -16
-; CHECK-NEXT: .cfi_offset r4, -20
-; CHECK-NEXT: .save {r11, lr}
-; CHECK-NEXT: push.w {r11, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 28
-; CHECK-NEXT: .cfi_offset lr, -24
-; CHECK-NEXT: .cfi_offset r11, -28
-; CHECK-NEXT: .setfp r11, sp
-; CHECK-NEXT: mov r11, sp
-; CHECK-NEXT: .cfi_def_cfa_register r11
-; CHECK-NEXT: .pad #4
-; CHECK-NEXT: sub sp, #4
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r5, r2
-; CHECK-NEXT: mov r4, r1
-; CHECK-NEXT: it ne
-; CHECK-NEXT: blne knr
-; CHECK-NEXT: adds r0, r5, #7
-; CHECK-NEXT: bic r0, r0, #7
-; CHECK-NEXT: sub.w r0, sp, r0
-; CHECK-NEXT: mov sp, r0
-; CHECK-NEXT: bl take_ptr
-; CHECK-NEXT: mov r0, r4
-; CHECK-NEXT: movs r1, #0
-; CHECK-NEXT: bl __aeabi_fcmpeq
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: it eq
-; CHECK-NEXT: bleq knr
-; CHECK-NEXT: mov sp, r11
-; CHECK-NEXT: pop.w {r11, lr}
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r12}
-; CHECK-NEXT: aut r12, lr, sp
-; CHECK-NEXT: bx lr
+; R7-LABEL: test3:
+; R7: .cfi_startproc
+; R7-NEXT: @ %bb.0: @ %entry
+; R7-NEXT: pac r12, lr, sp
+; R7-NEXT: .save {r4, r5, r6, r7, r8, ra_auth_code, lr}
+; R7-NEXT: push.w {r4, r5, r6, r7, r8, r12, lr}
+; R7-NEXT: .cfi_def_cfa_offset 28
+; R7-NEXT: .cfi_offset lr, -4
+; R7-NEXT: .cfi_offset ra_auth_code, -8
+; R7-NEXT: .cfi_offset r8, -12
+; R7-NEXT: .cfi_offset r7, -16
+; R7-NEXT: .cfi_offset r6, -20
+; R7-NEXT: .cfi_offset r5, -24
+; R7-NEXT: .cfi_offset r4, -28
+; R7-NEXT: .setfp r7, sp, #12
+; R7-NEXT: add r7, sp, #12
+; R7-NEXT: .cfi_def_cfa r7, 16
+; R7-NEXT: .pad #4
+; R7-NEXT: sub sp, #4
+; R7-NEXT: cmp r0, #0
+; R7-NEXT: mov r5, r2
+; R7-NEXT: mov r4, r1
+; R7-NEXT: it ne
+; R7-NEXT: blne knr
+; R7-NEXT: adds r0, r5, #7
+; R7-NEXT: bic r0, r0, #7
+; R7-NEXT: sub.w r0, sp, r0
+; R7-NEXT: mov sp, r0
+; R7-NEXT: bl take_ptr
+; R7-NEXT: mov r0, r4
+; R7-NEXT: movs r1, #0
+; R7-NEXT: bl __aeabi_fcmpeq
+; R7-NEXT: cmp r0, #0
+; R7-NEXT: it eq
+; R7-NEXT: bleq knr
+; R7-NEXT: sub.w r4, r7, #12
+; R7-NEXT: mov sp, r4
+; R7-NEXT: pop.w {r4, r5, r6, r7, r8, r12, lr}
+; R7-NEXT: aut r12, lr, sp
+; R7-NEXT: bx lr
+;
+; R7-ABI-LABEL: test3:
+; R7-ABI: .cfi_startproc
+; R7-ABI-NEXT: @ %bb.0: @ %entry
+; R7-ABI-NEXT: pac r12, lr, sp
+; R7-ABI-NEXT: .save {r4, r5, r6, r7, lr}
+; R7-ABI-NEXT: push {r4, r5, r6, r7, lr}
+; R7-ABI-NEXT: .cfi_def_cfa_offset 20
+; R7-ABI-NEXT: .cfi_offset lr, -4
+; R7-ABI-NEXT: .cfi_offset r7, -8
+; R7-ABI-NEXT: .cfi_offset r6, -12
+; R7-ABI-NEXT: .cfi_offset r5, -16
+; R7-ABI-NEXT: .cfi_offset r4, -20
+; R7-ABI-NEXT: .setfp r7, sp, #12
+; R7-ABI-NEXT: add r7, sp, #12
+; R7-ABI-NEXT: .cfi_def_cfa r7, 8
+; R7-ABI-NEXT: .save {r8, ra_auth_code}
+; R7-ABI-NEXT: push.w {r8, r12}
+; R7-ABI-NEXT: .cfi_offset ra_auth_code, -24
+; R7-ABI-NEXT: .cfi_offset r8, -28
+; R7-ABI-NEXT: .pad #4
+; R7-ABI-NEXT: sub sp, #4
+; R7-ABI-NEXT: cmp r0, #0
+; R7-ABI-NEXT: mov r5, r2
+; R7-ABI-NEXT: mov r4, r1
+; R7-ABI-NEXT: it ne
+; R7-ABI-NEXT: blne knr
+; R7-ABI-NEXT: adds r0, r5, #7
+; R7-ABI-NEXT: bic r0, r0, #7
+; R7-ABI-NEXT: sub.w r0, sp, r0
+; R7-ABI-NEXT: mov sp, r0
+; R7-ABI-NEXT: bl take_ptr
+; R7-ABI-NEXT: mov r0, r4
+; R7-ABI-NEXT: movs r1, #0
+; R7-ABI-NEXT: bl __aeabi_fcmpeq
+; R7-ABI-NEXT: cmp r0, #0
+; R7-ABI-NEXT: it eq
+; R7-ABI-NEXT: bleq knr
+; R7-ABI-NEXT: sub.w r4, r7, #20
+; R7-ABI-NEXT: mov sp, r4
+; R7-ABI-NEXT: pop.w {r8, r12}
+; R7-ABI-NEXT: pop.w {r4, r5, r6, r7, lr}
+; R7-ABI-NEXT: aut r12, lr, sp
+; R7-ABI-NEXT: bx lr
+;
+; R11-LABEL: test3:
+; R11: .cfi_startproc
+; R11-NEXT: @ %bb.0: @ %entry
+; R11-NEXT: pac r12, lr, sp
+; R11-NEXT: .save {r4, r5, r6, r7, r11, ra_auth_code, lr}
+; R11-NEXT: push.w {r4, r5, r6, r7, r11, r12, lr}
+; R11-NEXT: .cfi_def_cfa_offset 28
+; R11-NEXT: .cfi_offset lr, -4
+; R11-NEXT: .cfi_offset ra_auth_code, -8
+; R11-NEXT: .cfi_offset r11, -12
+; R11-NEXT: .cfi_offset r7, -16
+; R11-NEXT: .cfi_offset r6, -20
+; R11-NEXT: .cfi_offset r5, -24
+; R11-NEXT: .cfi_offset r4, -28
+; R11-NEXT: .setfp r11, sp, #16
+; R11-NEXT: add.w r11, sp, #16
+; R11-NEXT: .cfi_def_cfa r11, 12
+; R11-NEXT: .pad #4
+; R11-NEXT: sub sp, #4
+; R11-NEXT: cmp r0, #0
+; R11-NEXT: mov r5, r2
+; R11-NEXT: mov r4, r1
+; R11-NEXT: it ne
+; R11-NEXT: blne knr
+; R11-NEXT: adds r0, r5, #7
+; R11-NEXT: bic r0, r0, #7
+; R11-NEXT: sub.w r0, sp, r0
+; R11-NEXT: mov sp, r0
+; R11-NEXT: bl take_ptr
+; R11-NEXT: mov r0, r4
+; R11-NEXT: movs r1, #0
+; R11-NEXT: bl __aeabi_fcmpeq
+; R11-NEXT: cmp r0, #0
+; R11-NEXT: it eq
+; R11-NEXT: bleq knr
+; R11-NEXT: sub.w r4, r11, #16
+; R11-NEXT: mov sp, r4
+; R11-NEXT: pop.w {r4, r5, r6, r7, r11, r12, lr}
+; R11-NEXT: aut r12, lr, sp
+; R11-NEXT: bx lr
+;
+; R11-ABI-LABEL: test3:
+; R11-ABI: .cfi_startproc
+; R11-ABI-NEXT: @ %bb.0: @ %entry
+; R11-ABI-NEXT: pac r12, lr, sp
+; R11-ABI-NEXT: .save {r4, r5, r6, r7, ra_auth_code}
+; R11-ABI-NEXT: push.w {r4, r5, r6, r7, r12}
+; R11-ABI-NEXT: .cfi_def_cfa_offset 20
+; R11-ABI-NEXT: .cfi_offset ra_auth_code, -4
+; R11-ABI-NEXT: .cfi_offset r7, -8
+; R11-ABI-NEXT: .cfi_offset r6, -12
+; R11-ABI-NEXT: .cfi_offset r5, -16
+; R11-ABI-NEXT: .cfi_offset r4, -20
+; R11-ABI-NEXT: .save {r11, lr}
+; R11-ABI-NEXT: push.w {r11, lr}
+; R11-ABI-NEXT: .cfi_def_cfa_offset 28
+; R11-ABI-NEXT: .cfi_offset lr, -24
+; R11-ABI-NEXT: .cfi_offset r11, -28
+; R11-ABI-NEXT: .setfp r11, sp
+; R11-ABI-NEXT: mov r11, sp
+; R11-ABI-NEXT: .cfi_def_cfa_register r11
+; R11-ABI-NEXT: .pad #4
+; R11-ABI-NEXT: sub sp, #4
+; R11-ABI-NEXT: cmp r0, #0
+; R11-ABI-NEXT: mov r5, r2
+; R11-ABI-NEXT: mov r4, r1
+; R11-ABI-NEXT: it ne
+; R11-ABI-NEXT: blne knr
+; R11-ABI-NEXT: adds r0, r5, #7
+; R11-ABI-NEXT: bic r0, r0, #7
+; R11-ABI-NEXT: sub.w r0, sp, r0
+; R11-ABI-NEXT: mov sp, r0
+; R11-ABI-NEXT: bl take_ptr
+; R11-ABI-NEXT: mov r0, r4
+; R11-ABI-NEXT: movs r1, #0
+; R11-ABI-NEXT: bl __aeabi_fcmpeq
+; R11-ABI-NEXT: cmp r0, #0
+; R11-ABI-NEXT: it eq
+; R11-ABI-NEXT: bleq knr
+; R11-ABI-NEXT: mov sp, r11
+; R11-ABI-NEXT: pop.w {r11, lr}
+; R11-ABI-NEXT: pop.w {r4, r5, r6, r7, r12}
+; R11-ABI-NEXT: aut r12, lr, sp
+; R11-ABI-NEXT: bx lr
entry:
%tobool.not = icmp eq i32 %c, 0
br i1 %tobool.not, label %if.end, label %if.then
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-indirect-tail-call.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-indirect-tail-call.ll
index 9a8bba47f33ad6..615af15e8b5679 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-indirect-tail-call.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-indirect-tail-call.ll
@@ -9,10 +9,8 @@ define hidden i32 @f(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
; CHECK-LABEL: f:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: pac r12, lr, sp
-; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
-; CHECK-NEXT: push {r4, r5, r6, r7, lr}
-; CHECK-NEXT: .save {ra_auth_code}
-; CHECK-NEXT: str r12, [sp, #-4]!
+; CHECK-NEXT: .save {r4, r5, r6, r7, ra_auth_code, lr}
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r12, lr}
; CHECK-NEXT: mov r7, r3
; CHECK-NEXT: mov r5, r2
; CHECK-NEXT: mov r6, r1
@@ -24,8 +22,7 @@ define hidden i32 @f(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
; CHECK-NEXT: ldr r4, [r1]
; CHECK-NEXT: mov r1, r6
; CHECK-NEXT: blx r4
-; CHECK-NEXT: ldr r12, [sp], #4
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, lr}
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r12, lr}
; CHECK-NEXT: aut r12, lr, sp
; CHECK-NEXT: bx lr
entry:
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll
index ad94b7be8b2a60..d02d4b51d73b53 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll
@@ -38,20 +38,15 @@ define hidden i32 @f(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
; CHECK: .cfi_startproc
; CHECK-NEXT: @ %bb.0: @ %entry
; CHECK-NEXT: pac r12, lr, sp
-; CHECK-NEXT: .save {r4, r5, r6, lr}
-; CHECK-NEXT: push {r4, r5, r6, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset lr, -4
-; CHECK-NEXT: .cfi_offset r6, -8
-; CHECK-NEXT: .cfi_offset r5, -12
-; CHECK-NEXT: .cfi_offset r4, -16
-; CHECK-NEXT: .save {ra_auth_code}
-; CHECK-NEXT: str r12, [sp, #-4]!
-; CHECK-NEXT: .cfi_def_cfa_offset 20
-; CHECK-NEXT: .cfi_offset ra_auth_code, -20
+; CHECK-NEXT: .save {r4, r5, r6, ra_auth_code, lr}
; CHECK-NEXT: .pad #4
-; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: push.w {r3, r4, r5, r6, r12, lr}
; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: .cfi_offset lr, -4
+; CHECK-NEXT: .cfi_offset ra_auth_code, -8
+; CHECK-NEXT: .cfi_offset r6, -12
+; CHECK-NEXT: .cfi_offset r5, -16
+; CHECK-NEXT: .cfi_offset r4, -20
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: bmi .LBB1_2
; CHECK-NEXT: @ %bb.1: @ %if.end
@@ -61,9 +56,7 @@ define hidden i32 @f(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: mov.w r0, #-1
; CHECK-NEXT: .LBB1_3: @ %return
-; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: ldr r12, [sp], #4
-; CHECK-NEXT: pop.w {r4, r5, r6, lr}
+; CHECK-NEXT: pop.w {r3, r4, r5, r6, r12, lr}
; CHECK-NEXT: aut r12, lr, sp
; CHECK-NEXT: bx lr
entry:
@@ -92,20 +85,15 @@ define hidden i32 @g(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
; CHECK: .cfi_startproc
; CHECK-NEXT: @ %bb.0: @ %entry
; CHECK-NEXT: pac r12, lr, sp
-; CHECK-NEXT: .save {r4, r5, r6, lr}
-; CHECK-NEXT: push {r4, r5, r6, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset lr, -4
-; CHECK-NEXT: .cfi_offset r6, -8
-; CHECK-NEXT: .cfi_offset r5, -12
-; CHECK-NEXT: .cfi_offset r4, -16
-; CHECK-NEXT: .save {ra_auth_code}
-; CHECK-NEXT: str r12, [sp, #-4]!
-; CHECK-NEXT: .cfi_def_cfa_offset 20
-; CHECK-NEXT: .cfi_offset ra_auth_code, -20
+; CHECK-NEXT: .save {r4, r5, r6, ra_auth_code, lr}
; CHECK-NEXT: .pad #4
-; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: push.w {r3, r4, r5, r6, r12, lr}
; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: .cfi_offset lr, -4
+; CHECK-NEXT: .cfi_offset ra_auth_code, -8
+; CHECK-NEXT: .cfi_offset r6, -12
+; CHECK-NEXT: .cfi_offset r5, -16
+; CHECK-NEXT: .cfi_offset r4, -20
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: bmi .LBB2_2
; CHECK-NEXT: @ %bb.1: @ %if.end
@@ -115,9 +103,7 @@ define hidden i32 @g(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
; CHECK-NEXT: .LBB2_2:
; CHECK-NEXT: mov.w r0, #-1
; CHECK-NEXT: .LBB2_3: @ %return
-; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: ldr r12, [sp], #4
-; CHECK-NEXT: pop.w {r4, r5, r6, lr}
+; CHECK-NEXT: pop.w {r3, r4, r5, r6, r12, lr}
; CHECK-NEXT: aut r12, lr, sp
; CHECK-NEXT: bx lr
entry:
@@ -167,18 +153,20 @@ attributes #0 = { minsize noinline norecurse nounwind optsize readnone uwtable "
; UNWIND-LABEL: FunctionAddress: 0x4
; UNWIND: 0x00 ; vsp = vsp + 4
+; UNWIND-NEXT: 0xA2 ; pop {r4, r5, r6}
; UNWIND-NEXT: 0xB4 ; pop ra_auth_code
-; UNWIND-NEXT: 0xAA ; pop {r4, r5, r6, lr}
+; UNWIND-NEXT: 0x84 0x00 ; pop {lr}
-; UNWIND-LABEL: FunctionAddress: 0x30
+; UNWIND-LABEL: FunctionAddress: 0x26
; UNWIND: 0x00 ; vsp = vsp + 4
+; UNWIND-NEXT: 0xA2 ; pop {r4, r5, r6}
; UNWIND-NEXT: 0xB4 ; pop ra_auth_code
-; UNWIND-NEXT: 0xAA ; pop {r4, r5, r6, lr}
+; UNWIND-NEXT: 0x84 0x00 ; pop {lr}
-; UNWIND-LABEL: FunctionAddress: 0x5C
+; UNWIND-LABEL: FunctionAddress: 0x48
; UNWIND: 0xB4 ; pop ra_auth_code
-; UNWIND: 0x84 0x00 ; pop {lr}
+; UNWIND-NEXT: 0x84 0x00 ; pop {lr}
-; UNWIND-LABEL: 0000005d {{.*}} OUTLINED_FUNCTION_0
+; UNWIND-LABEL: 00000049 {{.*}} OUTLINED_FUNCTION_0
; UNWIND-LABEL: 00000005 {{.*}} f
-; UNWIND-LABEL: 00000031 {{.*}} g
+; UNWIND-LABEL: 00000027 {{.*}} g
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-4.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-4.ll
index c0b45c0f90eb25..8777d517c4badc 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-4.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-4.ll
@@ -33,25 +33,18 @@ define hidden i32 @_Z1hii(i32 %a, i32 %b) local_unnamed_addr #0 {
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: @ %bb.0: @ %entry
; CHECK-NEXT: pac r12, lr, sp
-; CHECK-NEXT: .save {r7, lr}
-; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: .cfi_offset lr, -4
-; CHECK-NEXT: .cfi_offset r7, -8
-; CHECK-NEXT: .save {ra_auth_code}
-; CHECK-NEXT: str r12, [sp, #-4]!
-; CHECK-NEXT: .cfi_def_cfa_offset 12
-; CHECK-NEXT: .cfi_offset ra_auth_code, -12
+; CHECK-NEXT: .save {r7, ra_auth_code, lr}
; CHECK-NEXT: .pad #4
-; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: push.w {r6, r7, r12, lr}
; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset lr, -4
+; CHECK-NEXT: .cfi_offset ra_auth_code, -8
+; CHECK-NEXT: .cfi_offset r7, -12
; CHECK-NEXT: cmp.w r0, #-1
; CHECK-NEXT: ble .LBB0_2
; CHECK-NEXT: @ %bb.1: @ %if.end
; CHECK-NEXT: add r0, r1
-; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: ldr r12, [sp], #4
-; CHECK-NEXT: pop.w {r7, lr}
+; CHECK-NEXT: pop.w {r3, r7, r12, lr}
; CHECK-NEXT: aut r12, lr, sp
; CHECK-NEXT: bx lr
; CHECK-NEXT: .LBB0_2: @ %if.then
@@ -90,20 +83,15 @@ define hidden i32 @_Z1fiiii(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #
; CHECK: .cfi_startproc
; CHECK-NEXT: @ %bb.0: @ %entry
; CHECK-NEXT: pac r12, lr, sp
-; CHECK-NEXT: .save {r4, r5, r6, lr}
-; CHECK-NEXT: push {r4, r5, r6, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset lr, -4
-; CHECK-NEXT: .cfi_offset r6, -8
-; CHECK-NEXT: .cfi_offset r5, -12
-; CHECK-NEXT: .cfi_offset r4, -16
-; CHECK-NEXT: .save {ra_auth_code}
-; CHECK-NEXT: str r12, [sp, #-4]!
-; CHECK-NEXT: .cfi_def_cfa_offset 20
-; CHECK-NEXT: .cfi_offset ra_auth_code, -20
+; CHECK-NEXT: .save {r4, r5, r6, ra_auth_code, lr}
; CHECK-NEXT: .pad #4
-; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: push.w {r3, r4, r5, r6, r12, lr}
; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: .cfi_offset lr, -4
+; CHECK-NEXT: .cfi_offset ra_auth_code, -8
+; CHECK-NEXT: .cfi_offset r6, -12
+; CHECK-NEXT: .cfi_offset r5, -16
+; CHECK-NEXT: .cfi_offset r4, -20
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: bmi .LBB1_2
; CHECK-NEXT: @ %bb.1: @ %if.end
@@ -117,9 +105,7 @@ define hidden i32 @_Z1fiiii(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #
; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: mov.w r0, #-1
; CHECK-NEXT: .LBB1_3: @ %return
-; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: ldr r12, [sp], #4
-; CHECK-NEXT: pop.w {r4, r5, r6, lr}
+; CHECK-NEXT: pop.w {r3, r4, r5, r6, r12, lr}
; CHECK-NEXT: aut r12, lr, sp
; CHECK-NEXT: bx lr
entry:
@@ -145,20 +131,15 @@ define hidden i32 @_Z1giiii(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #
; CHECK: .cfi_startproc
; CHECK-NEXT: @ %bb.0: @ %entry
; CHECK-NEXT: pac r12, lr, sp
-; CHECK-NEXT: .save {r4, r5, r6, lr}
-; CHECK-NEXT: push {r4, r5, r6, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset lr, -4
-; CHECK-NEXT: .cfi_offset r6, -8
-; CHECK-NEXT: .cfi_offset r5, -12
-; CHECK-NEXT: .cfi_offset r4, -16
-; CHECK-NEXT: .save {ra_auth_code}
-; CHECK-NEXT: str r12, [sp, #-4]!
-; CHECK-NEXT: .cfi_def_cfa_offset 20
-; CHECK-NEXT: .cfi_offset ra_auth_code, -20
+; CHECK-NEXT: .save {r4, r5, r6, ra_auth_code, lr}
; CHECK-NEXT: .pad #4
-; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: push.w {r3, r4, r5, r6, r12, lr}
; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: .cfi_offset lr, -4
+; CHECK-NEXT: .cfi_offset ra_auth_code, -8
+; CHECK-NEXT: .cfi_offset r6, -12
+; CHECK-NEXT: .cfi_offset r5, -16
+; CHECK-NEXT: .cfi_offset r4, -20
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: bmi .LBB2_2
; CHECK-NEXT: @ %bb.1: @ %if.end
@@ -172,9 +153,7 @@ define hidden i32 @_Z1giiii(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #
; CHECK-NEXT: .LBB2_2:
; CHECK-NEXT: mov.w r0, #-1
; CHECK-NEXT: .LBB2_3: @ %return
-; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: ldr r12, [sp], #4
-; CHECK-NEXT: pop.w {r4, r5, r6, lr}
+; CHECK-NEXT: pop.w {r3, r4, r5, r6, r12, lr}
; CHECK-NEXT: aut r12, lr, sp
; CHECK-NEXT: bx lr
entry:
@@ -213,32 +192,31 @@ attributes #2 = { noreturn "sign-return-address"="non-leaf" }
; UNWIND-LABEL: FunctionAddress: 0x0
-; UNWIND: Opcodes
+; UNWIND: Opcodes [
; UNWIND-NEXT: 0x00 ; vsp = vsp + 4
+; UNWIND-NEXT: 0x80 0x08 ; pop {r7}
; UNWIND-NEXT: 0xB4 ; pop ra_auth_code
-; UNWIND-NEXT: 0x84 0x08 ; pop {r7, lr}
-; UNWIND-NEXT: 0xB0 ; finish
-; UNWIND-NEXT: 0xB0 ; finish
+; UNWIND-NEXT: 0x84 0x00 ; pop {lr}
-; UNWIND-LABEL: FunctionAddress: 0x3C
-; UNWIND: Opcodes
+; UNWIND-LABEL: FunctionAddress: 0x30
+; UNWIND: Opcodes [
; UNWIND-NEXT: 0x00 ; vsp = vsp + 4
+; UNWIND-NEXT: 0xA2 ; pop {r4, r5, r6}
; UNWIND-NEXT: 0xB4 ; pop ra_auth_code
-; UNWIND-NEXT: 0xAA ; pop {r4, r5, r6, lr}
+; UNWIND-NEXT: 0x84 0x00 ; pop {lr}
-; UNWIND-LABEL: FunctionAddress: 0x72
-; UNWIND: Opcodes
+; UNWIND-LABEL: FunctionAddress: 0x5C
+; UNWIND: Opcodes [
; UNWIND-NEXT: 0x00 ; vsp = vsp + 4
+; UNWIND-NEXT: 0xA2 ; pop {r4, r5, r6}
; UNWIND-NEXT: 0xB4 ; pop ra_auth_code
-; UNWIND-NEXT: 0xAA ; pop {r4, r5, r6, lr}
+; UNWIND-NEXT: 0x84 0x00 ; pop {lr}
-; UNWIND-LABEL: FunctionAddress: 0xA8
-; UNWIND: Opcodes
-; UNWIND-NEXT: 0xB0 ; finish
-; UNWIND-NEXT: 0xB0 ; finish
+; UNWIND-LABEL: FunctionAddress: 0x88
+; UNWIND: Opcodes [
; UNWIND-NEXT: 0xB0 ; finish
-; UNWIND: 000000a9 {{.*}} OUTLINED_FUNCTION_0
+; UNWIND: 00000089 {{.*}} OUTLINED_FUNCTION_0
; UWNIND: 00000001 {{.*}} _Z1hii
-; UWNIND: 0000003d {{.*}} _Z1fiiii
-; UWNIND: 00000073 {{.*}} _Z1giiii
+; UWNIND: 00000031 {{.*}} _Z1fiiii
+; UWNIND: 0000005d {{.*}} _Z1giiii
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-overalign.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-overalign.ll
index 012120d976810b..5354303a034d4e 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-overalign.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-overalign.ll
@@ -21,19 +21,17 @@ define hidden i32 @_Z1fv() local_unnamed_addr "sign-return-address"="non-leaf" {
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: @ %bb.0: @ %entry
; CHECK-NEXT: pac r12, lr, sp
-; CHECK-NEXT: .save {r4, r6, r7, lr}
-; CHECK-NEXT: push {r4, r6, r7, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .save {r4, r6, r7, ra_auth_code, lr}
+; CHECK-NEXT: push.w {r4, r6, r7, r12, lr}
+; CHECK-NEXT: .cfi_def_cfa_offset 20
; CHECK-NEXT: .cfi_offset lr, -4
-; CHECK-NEXT: .cfi_offset r7, -8
-; CHECK-NEXT: .cfi_offset r6, -12
-; CHECK-NEXT: .cfi_offset r4, -16
+; CHECK-NEXT: .cfi_offset ra_auth_code, -8
+; CHECK-NEXT: .cfi_offset r7, -12
+; CHECK-NEXT: .cfi_offset r6, -16
+; CHECK-NEXT: .cfi_offset r4, -20
; CHECK-NEXT: .setfp r7, sp, #8
; CHECK-NEXT: add r7, sp, #8
-; CHECK-NEXT: .cfi_def_cfa r7, 8
-; CHECK-NEXT: .save {ra_auth_code}
-; CHECK-NEXT: str r12, [sp, #-4]!
-; CHECK-NEXT: .cfi_offset ra_auth_code, -20
+; CHECK-NEXT: .cfi_def_cfa r7, 12
; CHECK-NEXT: .pad #44
; CHECK-NEXT: sub sp, #44
; CHECK-NEXT: mov r4, sp
@@ -43,13 +41,12 @@ define hidden i32 @_Z1fv() local_unnamed_addr "sign-return-address"="non-leaf" {
; CHECK-NEXT: movs r0, #4
; CHECK-NEXT: bl _Z1giPi
; CHECK-NEXT: ldm.w sp, {r0, r1, r2, r3}
-; CHECK-NEXT: sub.w r4, r7, #12
+; CHECK-NEXT: sub.w r4, r7, #8
; CHECK-NEXT: add r0, r1
; CHECK-NEXT: add r0, r2
; CHECK-NEXT: add r0, r3
; CHECK-NEXT: mov sp, r4
-; CHECK-NEXT: ldr r12, [sp], #4
-; CHECK-NEXT: pop.w {r4, r6, r7, lr}
+; CHECK-NEXT: pop.w {r4, r6, r7, r12, lr}
; CHECK-NEXT: aut r12, lr, sp
; CHECK-NEXT: bx lr
entry:
@@ -78,6 +75,7 @@ declare dso_local i32 @_Z1giPi(i32, ptr) local_unnamed_addr
; UNWIND-LABEL: FunctionAddress: 0x0
; UNWIND: 0x97 ; vsp = r7
-; UNWIND: 0x42 ; vsp = vsp - 12
-; UNWIND: 0xB4 ; pop ra_auth_code
-; UNWIND: 0x84 0x0D ; pop {r4, r6, r7, lr}
+; UNWIND-NEXT: 0x41 ; vsp = vsp - 8
+; UNWIND-NEXT: 0x80 0x0D ; pop {r4, r6, r7}
+; UNWIND-NEXT: 0xB4 ; pop ra_auth_code
+; UNWIND-NEXT: 0x84 0x00 ; pop {lr}
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-stack-arg.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-stack-arg.ll
index cae38b5e4a5a1b..c0c32de509b75d 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-stack-arg.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-stack-arg.ll
@@ -19,17 +19,14 @@ define i32 @test_non_leaf(i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %x) "s
; CHECK-LABEL: test_non_leaf:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: pac r12, lr, sp
-; CHECK-NEXT: .save {r7, lr}
-; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: .save {ra_auth_code}
-; CHECK-NEXT: str r12, [sp, #-4]!
+; CHECK-NEXT: .save {r7, ra_auth_code, lr}
+; CHECK-NEXT: push.w {r7, r12, lr}
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: bl otherfn
; CHECK-NEXT: ldr r0, [sp, #16]
; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: ldr r12, [sp], #4
-; CHECK-NEXT: pop.w {r7, lr}
+; CHECK-NEXT: pop.w {r7, r12, lr}
; CHECK-NEXT: aut r12, lr, sp
; CHECK-NEXT: bx lr
entry:
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll
index 63adc78fe849c3..2b7abfabf7035a 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll
@@ -14,15 +14,12 @@ define hidden i32 @_Z1fiz(i32 %n, ...) local_unnamed_addr #0 {
; CHECK-NEXT: .pad #12
; CHECK-NEXT: sub sp, #12
; CHECK-NEXT: .cfi_def_cfa_offset 12
-; CHECK-NEXT: .save {r7, lr}
-; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 20
-; CHECK-NEXT: .cfi_offset lr, -16
-; CHECK-NEXT: .cfi_offset r7, -20
-; CHECK-NEXT: .save {ra_auth_code}
-; CHECK-NEXT: str r12, [sp, #-4]!
+; CHECK-NEXT: .save {r7, ra_auth_code, lr}
+; CHECK-NEXT: push.w {r7, r12, lr}
; CHECK-NEXT: .cfi_def_cfa_offset 24
-; CHECK-NEXT: .cfi_offset ra_auth_code, -24
+; CHECK-NEXT: .cfi_offset lr, -16
+; CHECK-NEXT: .cfi_offset ra_auth_code, -20
+; CHECK-NEXT: .cfi_offset r7, -24
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .cfi_def_cfa_offset 28
@@ -49,8 +46,7 @@ define hidden i32 @_Z1fiz(i32 %n, ...) local_unnamed_addr #0 {
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: .LBB0_4: @ %for.cond.cleanup
; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: ldr r12, [sp], #4
-; CHECK-NEXT: pop.w {r7, lr}
+; CHECK-NEXT: pop.w {r7, r12, lr}
; CHECK-NEXT: add sp, #12
; CHECK-NEXT: aut r12, lr, sp
; CHECK-NEXT: bx lr
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-2.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-2.ll
index 38b5b7a16e01bd..03b769f256bc28 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-2.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-2.ll
@@ -28,17 +28,14 @@ define hidden i32 @_Z1fiz(i32 %n, ...) local_unnamed_addr #0 {
; CHECK-NEXT: .pad #12
; CHECK-NEXT: sub sp, #12
; CHECK-NEXT: .cfi_def_cfa_offset 12
-; CHECK-NEXT: .save {r4, r5, r7, lr}
-; CHECK-NEXT: push {r4, r5, r7, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 28
-; CHECK-NEXT: .cfi_offset lr, -16
-; CHECK-NEXT: .cfi_offset r7, -20
-; CHECK-NEXT: .cfi_offset r5, -24
-; CHECK-NEXT: .cfi_offset r4, -28
-; CHECK-NEXT: .save {ra_auth_code}
-; CHECK-NEXT: str r12, [sp, #-4]!
+; CHECK-NEXT: .save {r4, r5, r7, ra_auth_code, lr}
+; CHECK-NEXT: push.w {r4, r5, r7, r12, lr}
; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: .cfi_offset ra_auth_code, -32
+; CHECK-NEXT: .cfi_offset lr, -16
+; CHECK-NEXT: .cfi_offset ra_auth_code, -20
+; CHECK-NEXT: .cfi_offset r7, -24
+; CHECK-NEXT: .cfi_offset r5, -28
+; CHECK-NEXT: .cfi_offset r4, -32
; CHECK-NEXT: .pad #8
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: .cfi_def_cfa_offset 40
@@ -63,8 +60,7 @@ define hidden i32 @_Z1fiz(i32 %n, ...) local_unnamed_addr #0 {
; CHECK-NEXT: .LBB0_2: @ %for.cond.cleanup
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: add sp, #8
-; CHECK-NEXT: ldr r12, [sp], #4
-; CHECK-NEXT: pop.w {r4, r5, r7, lr}
+; CHECK-NEXT: pop.w {r4, r5, r7, r12, lr}
; CHECK-NEXT: add sp, #12
; CHECK-NEXT: aut r12, lr, sp
; CHECK-NEXT: bx lr
@@ -111,7 +107,9 @@ attributes #1 = { nounwind "sign-return-address"="non-leaf"}
!2 = !{i32 8, !"sign-return-address-all", i32 0}
; UNWIND-LABEL: FunctionAddress
-; UNWIND: 0x01 ; vsp = vsp + 8
-; UNWIND-NEXT: 0xB4 ; pop ra_auth_code
-; UNWIND-NEXT: 0x84 0x0B ; pop {r4, r5, r7, lr}
-; UNWIND-NEXT: 0x02 ; vsp = vsp + 12
+; UNWIND: 0x01 ; vsp = vsp + 8
+; UNWIND-NEXT: 0x80 0x0B ; pop {r4, r5, r7}
+; UNWIND-NEXT: 0xB4 ; pop ra_auth_code
+; UNWIND-NEXT: 0x84 0x00 ; pop {lr}
+; UNWIND-NEXT: 0x02 ; vsp = vsp + 12
+
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll
index ccab35b7331141..5eb5990be7c118 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll
@@ -20,22 +20,20 @@ define hidden i32 @f(i32 %n) local_unnamed_addr #0 {
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: @ %bb.0: @ %entry
; CHECK-NEXT: pac r12, lr, sp
-; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
-; CHECK-NEXT: push {r4, r5, r6, r7, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 20
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, ra_auth_code, lr}
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r12, lr}
+; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: .cfi_offset lr, -4
-; CHECK-NEXT: .cfi_offset r7, -8
-; CHECK-NEXT: .cfi_offset r6, -12
-; CHECK-NEXT: .cfi_offset r5, -16
-; CHECK-NEXT: .cfi_offset r4, -20
+; CHECK-NEXT: .cfi_offset ra_auth_code, -8
+; CHECK-NEXT: .cfi_offset r9, -12
+; CHECK-NEXT: .cfi_offset r8, -16
+; CHECK-NEXT: .cfi_offset r7, -20
+; CHECK-NEXT: .cfi_offset r6, -24
+; CHECK-NEXT: .cfi_offset r5, -28
+; CHECK-NEXT: .cfi_offset r4, -32
; CHECK-NEXT: .setfp r7, sp, #12
; CHECK-NEXT: add r7, sp, #12
-; CHECK-NEXT: .cfi_def_cfa r7, 8
-; CHECK-NEXT: .save {r8, r9, ra_auth_code}
-; CHECK-NEXT: push.w {r8, r9, r12}
-; CHECK-NEXT: .cfi_offset ra_auth_code, -24
-; CHECK-NEXT: .cfi_offset r9, -28
-; CHECK-NEXT: .cfi_offset r8, -32
+; CHECK-NEXT: .cfi_def_cfa r7, 20
; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: movs r0, #7
; CHECK-NEXT: add.w r0, r0, r5, lsl #2
@@ -95,10 +93,9 @@ define hidden i32 @f(i32 %n) local_unnamed_addr #0 {
; CHECK-NEXT: ldrne r1, [r2, #8]
; CHECK-NEXT: addne r0, r1
; CHECK-NEXT: .LBB0_9: @ %for.cond.cleanup
-; CHECK-NEXT: sub.w r4, r7, #24
+; CHECK-NEXT: sub.w r4, r7, #12
; CHECK-NEXT: mov sp, r4
-; CHECK-NEXT: pop.w {r8, r9, r12}
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, lr}
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r12, lr}
; CHECK-NEXT: aut r12, lr, sp
; CHECK-NEXT: bx lr
entry:
More information about the llvm-commits
mailing list