[llvm] f406210 - [AArch64] Use Windows-style prologue/epilogue regardless of CFI. (#156467)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 11 05:15:11 PST 2025
Author: Sander de Smalen
Date: 2025-12-11T13:15:07Z
New Revision: f4062100a0c762b7cc656df6c91202e8a1829329
URL: https://github.com/llvm/llvm-project/commit/f4062100a0c762b7cc656df6c91202e8a1829329
DIFF: https://github.com/llvm/llvm-project/commit/f4062100a0c762b7cc656df6c91202e8a1829329.diff
LOG: [AArch64] Use Windows-style prologue/epilogue regardless of CFI. (#156467)
To reduce the number of combinations to support, always use the same
prologue/epilogue lowering on windows regardless of whether unwind info
is required.
This also fixes an issue where a function with SVE callee-saves and
`nounwind` led to a compilation failure, because the SVE lowering makes
assumptions that only hold when using the Windows style
prologue/epilogue.
---------
Co-authored-by: Benjamin Maxwell <benjamin.maxwell at arm.com>
Added:
Modified:
llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
llvm/lib/Target/AArch64/AArch64FrameLowering.h
llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
llvm/test/CodeGen/AArch64/arm64ec-reservedregs.ll
llvm/test/CodeGen/AArch64/framelayout-sve-win.mir
llvm/test/CodeGen/AArch64/llvm.frexp.ll
llvm/test/CodeGen/AArch64/preserve_mostcc.ll
llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll
llvm/test/CodeGen/AArch64/sme-lazy-save-windows.ll
llvm/test/CodeGen/AArch64/swift-async-win.ll
llvm/test/CodeGen/AArch64/win64-no-uwtable.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index c2f5c0368a782..799e116f05c27 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -383,6 +383,16 @@ static bool isLikelyToHaveSVEStack(const AArch64FrameLowering &AFL,
return false;
}
+static bool isTargetWindows(const MachineFunction &MF) {
+ return MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
+}
+
+bool AArch64FrameLowering::hasSVECalleeSavesAboveFrameRecord(
+ const MachineFunction &MF) const {
+ auto *AFI = MF.getInfo<AArch64FunctionInfo>();
+ return isTargetWindows(MF) && AFI->getSVECalleeSavedStackSize();
+}
+
/// Returns true if a homogeneous prolog or epilog code can be emitted
/// for the size optimization. If possible, a frame helper call is injected.
/// When Exit block is given, this check is for epilog.
@@ -396,7 +406,7 @@ bool AArch64FrameLowering::homogeneousPrologEpilog(
return false;
// TODO: Window is supported yet.
- if (needsWinCFI(MF))
+ if (isTargetWindows(MF))
return false;
// TODO: SVE is not supported yet.
@@ -1168,10 +1178,6 @@ bool AArch64FrameLowering::requiresSaveVG(const MachineFunction &MF) const {
return true;
}
-static bool isTargetWindows(const MachineFunction &MF) {
- return MF.getSubtarget<AArch64Subtarget>().isTargetWindows();
-}
-
void AArch64FrameLowering::emitPacRetPlusLeafHardening(
MachineFunction &MF) const {
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
@@ -1270,8 +1276,7 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
return StackOffset::getFixed(ObjectOffset - getOffsetOfLocalArea());
const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
- bool FPAfterSVECalleeSaves =
- isTargetWindows(MF) && AFI->getSVECalleeSavedStackSize();
+ bool FPAfterSVECalleeSaves = hasSVECalleeSavesAboveFrameRecord(MF);
if (MFI.hasScalableStackID(FI)) {
if (FPAfterSVECalleeSaves &&
-ObjectOffset <= (int64_t)AFI->getSVECalleeSavedStackSize()) {
@@ -1441,8 +1446,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
"In the presence of dynamic stack pointer realignment, "
"non-argument/CSR objects cannot be accessed through the frame pointer");
- bool FPAfterSVECalleeSaves =
- isTargetWindows(MF) && AFI->getSVECalleeSavedStackSize();
+ bool FPAfterSVECalleeSaves = hasSVECalleeSavesAboveFrameRecord(MF);
if (isSVE) {
StackOffset FPOffset = StackOffset::get(
@@ -1673,7 +1677,6 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
return;
bool IsWindows = isTargetWindows(MF);
- bool NeedsWinCFI = AFL.needsWinCFI(MF);
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
unsigned StackHazardSize = getStackHazardSize(MF);
MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -1691,7 +1694,7 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
int StackFillDir = -1;
int RegInc = 1;
unsigned FirstReg = 0;
- if (NeedsWinCFI) {
+ if (IsWindows) {
// For WinCFI, fill the stack from the bottom up.
ByteOffset = 0;
StackFillDir = 1;
@@ -1701,7 +1704,7 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
FirstReg = Count - 1;
}
- bool FPAfterSVECalleeSaves = IsWindows && AFI->getSVECalleeSavedStackSize();
+ bool FPAfterSVECalleeSaves = AFL.hasSVECalleeSavesAboveFrameRecord(MF);
// Windows AAPCS has x9-x15 as volatile registers, x16-x17 as intra-procedural
// scratch, x18 as platform reserved. However, clang has extended calling
// convensions such as preserve_most and preserve_all which treat these as
@@ -1771,6 +1774,7 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
ByteOffset += StackFillDir * StackHazardSize;
LastReg = RPI.Reg1;
+ bool NeedsWinCFI = AFL.needsWinCFI(MF);
int Scale = TRI->getSpillSize(*RPI.RC);
// Add the next reg to the pair if it is in the same register class.
if (unsigned(i + RegInc) < Count && !HasCSHazardPadding) {
@@ -1787,9 +1791,9 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
break;
case RegPairInfo::FPR64:
if (AArch64::FPR64RegClass.contains(NextReg) &&
- !invalidateWindowsRegisterPairing(SpillExtendedVolatile, SpillCount,
- RPI.Reg1, NextReg, NeedsWinCFI,
- IsFirst, TRI))
+ !invalidateRegisterPairing(
+ SpillExtendedVolatile, SpillCount, RPI.Reg1, NextReg, IsWindows,
+ NeedsWinCFI, NeedsFrameRecord, IsFirst, TRI))
RPI.Reg2 = NextReg;
break;
case RegPairInfo::FPR128:
@@ -1843,7 +1847,7 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
"Callee-save registers not saved as adjacent register pair!");
RPI.FrameIdx = CSI[i].getFrameIdx();
- if (NeedsWinCFI &&
+ if (IsWindows &&
RPI.isPaired()) // RPI.FrameIdx must be the lower index of the pair
RPI.FrameIdx = CSI[i + RegInc].getFrameIdx();
@@ -1870,7 +1874,7 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
// Round up size of non-pair to pair size if we need to pad the
// callee-save area to ensure 16-byte alignment.
- if (NeedGapToAlignStack && !NeedsWinCFI && !RPI.isScalable() &&
+ if (NeedGapToAlignStack && !IsWindows && !RPI.isScalable() &&
RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired() &&
ByteOffset % 16 != 0) {
ByteOffset += 8 * StackFillDir;
@@ -1886,7 +1890,7 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
assert(OffsetPost % Scale == 0);
// If filling top down (default), we want the offset after incrementing it.
// If filling bottom up (WinCFI) we need the original offset.
- int Offset = NeedsWinCFI ? OffsetPre : OffsetPost;
+ int Offset = IsWindows ? OffsetPre : OffsetPost;
// The FP, LR pair goes 8 bytes into our expanded 24-byte slot so that the
// Swift context can directly precede FP.
@@ -1925,7 +1929,7 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
if (RPI.isPaired())
i += RegInc;
}
- if (NeedsWinCFI) {
+ if (IsWindows) {
// If we need an alignment gap in the stack, align the topmost stack
// object. A stack frame with a gap looks like this, bottom up:
// x19, d8. d9, gap.
@@ -2063,14 +2067,15 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
dbgs() << ")\n";
});
- assert((!NeedsWinCFI || !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) &&
+ assert((!isTargetWindows(MF) ||
+ !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) &&
"Windows unwdinding requires a consecutive (FP,LR) pair");
// Windows unwind codes require consecutive registers if registers are
// paired. Make the switch here, so that the code below will save (x,x+1)
// and not (x+1,x).
unsigned FrameIdxReg1 = RPI.FrameIdx;
unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
- if (NeedsWinCFI && RPI.isPaired()) {
+ if (isTargetWindows(MF) && RPI.isPaired()) {
std::swap(Reg1, Reg2);
std::swap(FrameIdxReg1, FrameIdxReg2);
}
@@ -2239,7 +2244,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
// and not (x+1,x).
unsigned FrameIdxReg1 = RPI.FrameIdx;
unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
- if (NeedsWinCFI && RPI.isPaired()) {
+ if (isTargetWindows(MF) && RPI.isPaired()) {
std::swap(Reg1, Reg2);
std::swap(FrameIdxReg1, FrameIdxReg2);
}
@@ -2756,14 +2761,14 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
MachineFunction &MF, const TargetRegisterInfo *RegInfo,
std::vector<CalleeSavedInfo> &CSI) const {
- bool NeedsWinCFI = needsWinCFI(MF);
+ bool IsWindows = isTargetWindows(MF);
unsigned StackHazardSize = getStackHazardSize(MF);
// To match the canonical windows frame layout, reverse the list of
// callee saved registers to get them laid out by PrologEpilogInserter
// in the right order. (PrologEpilogInserter allocates stack objects top
// down. Windows canonical prologs store higher numbered registers at
// the top, thus have the CSI array start from the highest registers.)
- if (NeedsWinCFI)
+ if (IsWindows)
std::reverse(CSI.begin(), CSI.end());
if (CSI.empty())
@@ -2774,8 +2779,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
MachineFrameInfo &MFI = MF.getFrameInfo();
auto *AFI = MF.getInfo<AArch64FunctionInfo>();
- bool UsesWinAAPCS = isTargetWindows(MF);
- if (UsesWinAAPCS && hasFP(MF) && AFI->hasSwiftAsyncContext()) {
+ if (IsWindows && hasFP(MF) && AFI->hasSwiftAsyncContext()) {
int FrameIdx = MFI.CreateStackObject(8, Align(16), true);
AFI->setSwiftAsyncContextFrameIdx(FrameIdx);
MFI.setIsCalleeSavedObjectIndex(FrameIdx, true);
@@ -2818,7 +2822,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
MFI.setIsCalleeSavedObjectIndex(FrameIdx, true);
// Grab 8 bytes below FP for the extended asynchronous frame info.
- if (hasFP(MF) && AFI->hasSwiftAsyncContext() && !UsesWinAAPCS &&
+ if (hasFP(MF) && AFI->hasSwiftAsyncContext() && !IsWindows &&
Reg == AArch64::FP) {
FrameIdx = MFI.CreateStackObject(8, Alignment, true);
AFI->setSwiftAsyncContextFrameIdx(FrameIdx);
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 97db18dd30bef..b2002dfd7039f 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -168,6 +168,20 @@ class AArch64FrameLowering : public TargetFrameLowering {
friend class AArch64PrologueEmitter;
friend class AArch64EpilogueEmitter;
+ // Windows unwind can't represent the required stack adjustments if we have
+ // both SVE callee-saves and dynamic stack allocations, and the frame
+ // pointer is before the SVE spills. The allocation of the frame pointer
+ // must be the last instruction in the prologue so the unwinder can restore
+ // the stack pointer correctly. (And there isn't any unwind opcode for
+ // `addvl sp, x29, -17`.)
+ //
+ // Because of this, we do spills in the opposite order on Windows: first SVE,
+ // then GPRs. The main side-effect of this is that it makes accessing
+ // parameters passed on the stack more expensive.
+ //
+ // We could consider rearranging the spills for simpler cases.
+ bool hasSVECalleeSavesAboveFrameRecord(const MachineFunction &MF) const;
+
protected:
bool hasFPImpl(const MachineFunction &MF) const override;
diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
index 965585f40571b..bb4f7e241fbca 100644
--- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
@@ -96,19 +96,7 @@ AArch64PrologueEpilogueCommon::AArch64PrologueEpilogueCommon(
HasFP = AFL.hasFP(MF);
NeedsWinCFI = AFL.needsWinCFI(MF);
- // Windows unwind can't represent the required stack adjustments if we have
- // both SVE callee-saves and dynamic stack allocations, and the frame pointer
- // is before the SVE spills. The allocation of the frame pointer must be the
- // last instruction in the prologue so the unwinder can restore the stack
- // pointer correctly. (And there isn't any unwind opcode for `addvl sp, x29,
- // -17`.)
- //
- // Because of this, we do spills in the opposite order on Windows: first SVE,
- // then GPRs. The main side-effect of this is that it makes accessing
- // parameters passed on the stack more expensive.
- //
- // We could consider rearranging the spills for simpler cases.
- if (Subtarget.isTargetWindows() && AFI->getSVECalleeSavedStackSize()) {
+ if (AFL.hasSVECalleeSavesAboveFrameRecord(MF)) {
if (AFI->hasStackHazardSlotIndex())
reportFatalUsageError("SME hazard padding is not supported on Windows");
SVELayout = SVEStackLayout::CalleeSavesAboveFrameRecord;
diff --git a/llvm/test/CodeGen/AArch64/arm64ec-reservedregs.ll b/llvm/test/CodeGen/AArch64/arm64ec-reservedregs.ll
index b07f95e7fe474..333f47fb17516 100644
--- a/llvm/test/CodeGen/AArch64/arm64ec-reservedregs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64ec-reservedregs.ll
@@ -9,20 +9,20 @@
define i32 @no_int_regs(i32 %x) nounwind {
; CHECK-LABEL: no_int_regs:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: stp x30, x29, [sp, #-80]! // 16-byte Folded Spill
-; CHECK-NEXT: str x27, [sp, #16] // 8-byte Spill
-; CHECK-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: str w0, [sp, #28] // 4-byte Spill
+; CHECK-NEXT: stp x19, x20, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NEXT: stp x21, x22, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp x25, x26, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: str x27, [sp, #48] // 8-byte Spill
+; CHECK-NEXT: stp x29, x30, [sp, #56] // 16-byte Folded Spill
+; CHECK-NEXT: str w0, [sp, #76] // 4-byte Spill
; CHECK-NEXT: //APP
; CHECK-NEXT: //NO_APP
-; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT: ldr w0, [sp, #28] // 4-byte Reload
-; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x27, [sp, #16] // 8-byte Reload
-; CHECK-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x30, x29, [sp], #80 // 16-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp, #56] // 16-byte Folded Reload
+; CHECK-NEXT: ldr w0, [sp, #76] // 4-byte Reload
+; CHECK-NEXT: ldp x25, x26, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x27, [sp, #48] // 8-byte Reload
+; CHECK-NEXT: ldp x21, x22, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x19, x20, [sp], #80 // 16-byte Folded Reload
; CHECK-NEXT: ret
entry:
tail call void asm sideeffect "", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x15},~{x16},~{x17},~{x19},~{x20},~{x21},~{x22},~{x25},~{x26},~{x27},~{fp},~{lr}"()
@@ -32,20 +32,20 @@ entry:
define i32 @one_int_reg(i32 %x) nounwind {
; CHECK-LABEL: one_int_reg:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: stp x30, x29, [sp, #-80]! // 16-byte Folded Spill
-; CHECK-NEXT: str x27, [sp, #16] // 8-byte Spill
+; CHECK-NEXT: stp x19, x20, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NEXT: stp x21, x22, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp x25, x26, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: str x27, [sp, #48] // 8-byte Spill
+; CHECK-NEXT: stp x29, x30, [sp, #56] // 16-byte Folded Spill
; CHECK-NEXT: mov w30, w0
-; CHECK-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: //APP
; CHECK-NEXT: //NO_APP
-; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x27, [sp, #16] // 8-byte Reload
-; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: mov w0, w30
-; CHECK-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x30, x29, [sp], #80 // 16-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp, #56] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x25, x26, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x27, [sp, #48] // 8-byte Reload
+; CHECK-NEXT: ldp x21, x22, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x19, x20, [sp], #80 // 16-byte Folded Reload
; CHECK-NEXT: ret
entry:
tail call void asm sideeffect "", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x15},~{x16},~{x17},~{x19},~{x20},~{x21},~{x22},~{x25},~{x26},~{x27},~{fp}"()
@@ -56,18 +56,18 @@ define float @no_float_regs(float %x) nounwind {
; CHECK-LABEL: no_float_regs:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sub sp, sp, #80
-; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp d8, d9, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d10, d11, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d12, d13, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: stp d14, d15, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: str s0, [sp, #12] // 4-byte Spill
; CHECK-NEXT: //APP
; CHECK-NEXT: //NO_APP
-; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d14, d15, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Reload
-; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d12, d13, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d10, d11, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d8, d9, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #80
; CHECK-NEXT: ret
entry:
@@ -78,21 +78,20 @@ entry:
define float @one_float_reg(float %x) nounwind {
; CHECK-LABEL: one_float_reg:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: stp d15, d14, [sp, #-64]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d8, d9, [sp, #-64]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d14, d15, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: fmov s15, s0
-; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: stp d10, d11, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d12, d13, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: //APP
; CHECK-NEXT: //NO_APP
-; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d12, d13, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: fmov s0, s15
-; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload
+; CHECK-NEXT: ldp d14, d15, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d10, d11, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d8, d9, [sp], #64 // 16-byte Folded Reload
; CHECK-NEXT: ret
entry:
tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14}"()
ret float %x
}
-
diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve-win.mir b/llvm/test/CodeGen/AArch64/framelayout-sve-win.mir
index b8302e64f282d..0d84fa14d8672 100644
--- a/llvm/test/CodeGen/AArch64/framelayout-sve-win.mir
+++ b/llvm/test/CodeGen/AArch64/framelayout-sve-win.mir
@@ -17,6 +17,7 @@
define aarch64_sve_vector_pcs void @save_restore_sve() uwtable { entry: unreachable }
define aarch64_sve_vector_pcs void @save_restore_sve_realign() uwtable { entry: unreachable }
define aarch64_sve_vector_pcs void @frame_layout() uwtable { entry: unreachable }
+ define aarch64_sve_vector_pcs void @test_nounwind_layout() nounwind { entry: unreachable }
...
---
name: test_allocate_sve
@@ -882,3 +883,29 @@ body: |
RET_ReallyLR
...
+---
+name: test_nounwind_layout
+stack:
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: test_nounwind_layout
+ ; CHECK: fixedStack:
+ ; CHECK: liveins: $p8, $z8, $lr, $x20
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2, implicit $vg
+ ; CHECK-NEXT: frame-setup STR_PXI killed $p8, $sp, 0 :: (store (s16) into %stack.3)
+ ; CHECK-NEXT: frame-setup STR_ZXI killed $z8, $sp, 1 :: (store (s128) into %stack.2)
+ ; CHECK-NEXT: early-clobber $sp = frame-setup STPXpre killed $x20, killed $lr, $sp, -2 :: (store (s64) into %stack.0), (store (s64) into %stack.1)
+ ; CHECK-NEXT: $x20 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p8 = IMPLICIT_DEF
+ ; CHECK-NEXT: $z8 = IMPLICIT_DEF
+ ; CHECK-NEXT: early-clobber $sp, $x20, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.0), (load (s64) from %stack.1)
+ ; CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.2)
+ ; CHECK-NEXT: $p8 = frame-destroy LDR_PXI $sp, 0 :: (load (s16) from %stack.3)
+ ; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2, implicit $vg
+ ; CHECK-NEXT: RET_ReallyLR
+ $x20 = IMPLICIT_DEF
+ $p8 = IMPLICIT_DEF
+ $z8 = IMPLICIT_DEF
+ RET_ReallyLR
+...
diff --git a/llvm/test/CodeGen/AArch64/llvm.frexp.ll b/llvm/test/CodeGen/AArch64/llvm.frexp.ll
index bc9f4c54174d4..7fd4246cd4975 100644
--- a/llvm/test/CodeGen/AArch64/llvm.frexp.ll
+++ b/llvm/test/CodeGen/AArch64/llvm.frexp.ll
@@ -133,7 +133,7 @@ define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) nounwind
; WINDOWS-NEXT: mov h1, v0.h[1]
; WINDOWS-NEXT: str q0, [sp, #16] // 16-byte Spill
; WINDOWS-NEXT: add x0, sp, #36
-; WINDOWS-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill
+; WINDOWS-NEXT: stp x19, x30, [sp, #48] // 16-byte Folded Spill
; WINDOWS-NEXT: add x19, sp, #36
; WINDOWS-NEXT: fcvt d0, h1
; WINDOWS-NEXT: bl frexp
@@ -166,7 +166,7 @@ define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) nounwind
; WINDOWS-NEXT: ldr s1, [sp, #32]
; WINDOWS-NEXT: ldr q0, [sp] // 16-byte Reload
; WINDOWS-NEXT: ld1 { v1.s }[1], [x19]
-; WINDOWS-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload
+; WINDOWS-NEXT: ldp x19, x30, [sp, #48] // 16-byte Folded Reload
; WINDOWS-NEXT: // kill: def $d1 killed $d1 killed $q1
; WINDOWS-NEXT: mov v0.h[3], v2.h[0]
; WINDOWS-NEXT: // kill: def $d0 killed $d0 killed $q0
@@ -297,7 +297,7 @@ define <2 x i32> @test_frexp_v2f16_v2i32_only_use_exp(<2 x half> %a) nounwind {
; WINDOWS-NEXT: str q0, [sp] // 16-byte Spill
; WINDOWS-NEXT: add x0, sp, #24
; WINDOWS-NEXT: fcvt d0, h0
-; WINDOWS-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill
+; WINDOWS-NEXT: stp x19, x30, [sp, #32] // 16-byte Folded Spill
; WINDOWS-NEXT: bl frexp
; WINDOWS-NEXT: ldr q0, [sp] // 16-byte Reload
; WINDOWS-NEXT: add x0, sp, #28
@@ -307,7 +307,7 @@ define <2 x i32> @test_frexp_v2f16_v2i32_only_use_exp(<2 x half> %a) nounwind {
; WINDOWS-NEXT: bl frexp
; WINDOWS-NEXT: ldr s0, [sp, #24]
; WINDOWS-NEXT: ld1 { v0.s }[1], [x19]
-; WINDOWS-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload
+; WINDOWS-NEXT: ldp x19, x30, [sp, #32] // 16-byte Folded Reload
; WINDOWS-NEXT: // kill: def $d0 killed $d0 killed $q0
; WINDOWS-NEXT: add sp, sp, #48
; WINDOWS-NEXT: ret
@@ -360,9 +360,9 @@ define { <3 x float>, <3 x i32> } @test_frexp_v3f32_v3i32(<3 x float> %a) nounwi
; WINDOWS-NEXT: mov s1, v0.s[1]
; WINDOWS-NEXT: str q0, [sp, #16] // 16-byte Spill
; WINDOWS-NEXT: add x0, sp, #36
-; WINDOWS-NEXT: stp x30, x21, [sp, #48] // 16-byte Folded Spill
-; WINDOWS-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; WINDOWS-NEXT: stp x19, x20, [sp, #48] // 16-byte Folded Spill
; WINDOWS-NEXT: add x19, sp, #36
+; WINDOWS-NEXT: stp x21, x30, [sp, #64] // 16-byte Folded Spill
; WINDOWS-NEXT: fcvt d0, s1
; WINDOWS-NEXT: bl frexp
; WINDOWS-NEXT: fcvt s0, d0
@@ -397,9 +397,9 @@ define { <3 x float>, <3 x i32> } @test_frexp_v3f32_v3i32(<3 x float> %a) nounwi
; WINDOWS-NEXT: ld1 { v1.s }[1], [x19]
; WINDOWS-NEXT: mov v0.s[3], v2.s[0]
; WINDOWS-NEXT: ld1 { v1.s }[2], [x20]
-; WINDOWS-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; WINDOWS-NEXT: ldp x19, x20, [sp, #48] // 16-byte Folded Reload
; WINDOWS-NEXT: ld1 { v1.s }[3], [x21]
-; WINDOWS-NEXT: ldp x30, x21, [sp, #48] // 16-byte Folded Reload
+; WINDOWS-NEXT: ldp x21, x30, [sp, #64] // 16-byte Folded Reload
; WINDOWS-NEXT: add sp, sp, #80
; WINDOWS-NEXT: ret
%result = call { <3 x float>, <3 x i32> } @llvm.frexp.v3f32.v3i32(<3 x float> %a)
@@ -536,7 +536,7 @@ define { <2 x float>, <2 x i32> } @test_frexp_v2f32_v2i32(<2 x float> %a) nounwi
; WINDOWS-NEXT: mov s1, v0.s[1]
; WINDOWS-NEXT: str q0, [sp] // 16-byte Spill
; WINDOWS-NEXT: add x0, sp, #28
-; WINDOWS-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill
+; WINDOWS-NEXT: stp x19, x30, [sp, #32] // 16-byte Folded Spill
; WINDOWS-NEXT: add x19, sp, #28
; WINDOWS-NEXT: fcvt d0, s1
; WINDOWS-NEXT: bl frexp
@@ -551,7 +551,7 @@ define { <2 x float>, <2 x i32> } @test_frexp_v2f32_v2i32(<2 x float> %a) nounwi
; WINDOWS-NEXT: ldr s1, [sp, #24]
; WINDOWS-NEXT: ldr q2, [sp] // 16-byte Reload
; WINDOWS-NEXT: ld1 { v1.s }[1], [x19]
-; WINDOWS-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload
+; WINDOWS-NEXT: ldp x19, x30, [sp, #32] // 16-byte Folded Reload
; WINDOWS-NEXT: // kill: def $d1 killed $d1 killed $q1
; WINDOWS-NEXT: mov v0.s[1], v2.s[0]
; WINDOWS-NEXT: // kill: def $d0 killed $d0 killed $q0
@@ -643,7 +643,7 @@ define <2 x i32> @test_frexp_v2f32_v2i32_only_use_exp(<2 x float> %a) nounwind {
; WINDOWS-NEXT: str q0, [sp] // 16-byte Spill
; WINDOWS-NEXT: add x0, sp, #24
; WINDOWS-NEXT: fcvt d0, s0
-; WINDOWS-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill
+; WINDOWS-NEXT: stp x19, x30, [sp, #32] // 16-byte Folded Spill
; WINDOWS-NEXT: bl frexp
; WINDOWS-NEXT: ldr q0, [sp] // 16-byte Reload
; WINDOWS-NEXT: add x0, sp, #28
@@ -653,7 +653,7 @@ define <2 x i32> @test_frexp_v2f32_v2i32_only_use_exp(<2 x float> %a) nounwind {
; WINDOWS-NEXT: bl frexp
; WINDOWS-NEXT: ldr s0, [sp, #24]
; WINDOWS-NEXT: ld1 { v0.s }[1], [x19]
-; WINDOWS-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload
+; WINDOWS-NEXT: ldp x19, x30, [sp, #32] // 16-byte Folded Reload
; WINDOWS-NEXT: // kill: def $d0 killed $d0 killed $q0
; WINDOWS-NEXT: add sp, sp, #48
; WINDOWS-NEXT: ret
@@ -717,9 +717,9 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwi
; WINDOWS-NEXT: mov s1, v0.s[1]
; WINDOWS-NEXT: str q0, [sp, #16] // 16-byte Spill
; WINDOWS-NEXT: add x0, sp, #36
-; WINDOWS-NEXT: stp x30, x21, [sp, #48] // 16-byte Folded Spill
-; WINDOWS-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; WINDOWS-NEXT: stp x19, x20, [sp, #48] // 16-byte Folded Spill
; WINDOWS-NEXT: add x19, sp, #36
+; WINDOWS-NEXT: stp x21, x30, [sp, #64] // 16-byte Folded Spill
; WINDOWS-NEXT: fcvt d0, s1
; WINDOWS-NEXT: bl frexp
; WINDOWS-NEXT: fcvt s0, d0
@@ -754,9 +754,9 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwi
; WINDOWS-NEXT: ld1 { v1.s }[1], [x19]
; WINDOWS-NEXT: mov v0.s[3], v2.s[0]
; WINDOWS-NEXT: ld1 { v1.s }[2], [x20]
-; WINDOWS-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; WINDOWS-NEXT: ldp x19, x20, [sp, #48] // 16-byte Folded Reload
; WINDOWS-NEXT: ld1 { v1.s }[3], [x21]
-; WINDOWS-NEXT: ldp x30, x21, [sp, #48] // 16-byte Folded Reload
+; WINDOWS-NEXT: ldp x21, x30, [sp, #64] // 16-byte Folded Reload
; WINDOWS-NEXT: add sp, sp, #80
; WINDOWS-NEXT: ret
%result = call { <4 x float>, <4 x i32> } @llvm.frexp.v4f32.v4i32(<4 x float> %a)
@@ -887,8 +887,8 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind {
; WINDOWS-NEXT: str q0, [sp] // 16-byte Spill
; WINDOWS-NEXT: fcvt d0, s0
; WINDOWS-NEXT: add x0, sp, #16
-; WINDOWS-NEXT: stp x30, x21, [sp, #32] // 16-byte Folded Spill
-; WINDOWS-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
+; WINDOWS-NEXT: stp x19, x20, [sp, #32] // 16-byte Folded Spill
+; WINDOWS-NEXT: stp x21, x30, [sp, #48] // 16-byte Folded Spill
; WINDOWS-NEXT: bl frexp
; WINDOWS-NEXT: ldr q0, [sp] // 16-byte Reload
; WINDOWS-NEXT: add x0, sp, #20
@@ -911,9 +911,9 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind {
; WINDOWS-NEXT: ldr s0, [sp, #16]
; WINDOWS-NEXT: ld1 { v0.s }[1], [x19]
; WINDOWS-NEXT: ld1 { v0.s }[2], [x20]
-; WINDOWS-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
+; WINDOWS-NEXT: ldp x19, x20, [sp, #32] // 16-byte Folded Reload
; WINDOWS-NEXT: ld1 { v0.s }[3], [x21]
-; WINDOWS-NEXT: ldp x30, x21, [sp, #32] // 16-byte Folded Reload
+; WINDOWS-NEXT: ldp x21, x30, [sp, #48] // 16-byte Folded Reload
; WINDOWS-NEXT: add sp, sp, #64
; WINDOWS-NEXT: ret
%result = call { <4 x float>, <4 x i32> } @llvm.frexp.v4f32.v4i32(<4 x float> %a)
@@ -1019,7 +1019,7 @@ define { <2 x double>, <2 x i32> } @test_frexp_v2f64_v2i32(<2 x double> %a) noun
; WINDOWS-NEXT: str q0, [sp] // 16-byte Spill
; WINDOWS-NEXT: mov d0, v0.d[1]
; WINDOWS-NEXT: add x0, sp, #40
-; WINDOWS-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill
+; WINDOWS-NEXT: stp x19, x30, [sp, #48] // 16-byte Folded Spill
; WINDOWS-NEXT: add x19, sp, #40
; WINDOWS-NEXT: bl frexp
; WINDOWS-NEXT: // kill: def $d0 killed $d0 def $q0
@@ -1032,7 +1032,7 @@ define { <2 x double>, <2 x i32> } @test_frexp_v2f64_v2i32(<2 x double> %a) noun
; WINDOWS-NEXT: ldr q2, [sp, #16] // 16-byte Reload
; WINDOWS-NEXT: // kill: def $d0 killed $d0 def $q0
; WINDOWS-NEXT: ld1 { v1.s }[1], [x19]
-; WINDOWS-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload
+; WINDOWS-NEXT: ldp x19, x30, [sp, #48] // 16-byte Folded Reload
; WINDOWS-NEXT: mov v0.d[1], v2.d[0]
; WINDOWS-NEXT: // kill: def $d1 killed $d1 killed $q1
; WINDOWS-NEXT: add sp, sp, #64
@@ -1113,7 +1113,7 @@ define <2 x i32> @test_frexp_v2f64_v2i32_only_use_exp(<2 x double> %a) nounwind
; WINDOWS: // %bb.0:
; WINDOWS-NEXT: sub sp, sp, #48
; WINDOWS-NEXT: add x0, sp, #28
-; WINDOWS-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill
+; WINDOWS-NEXT: stp x19, x30, [sp, #32] // 16-byte Folded Spill
; WINDOWS-NEXT: str q0, [sp] // 16-byte Spill
; WINDOWS-NEXT: // kill: def $d0 killed $d0 killed $q0
; WINDOWS-NEXT: bl frexp
@@ -1124,7 +1124,7 @@ define <2 x i32> @test_frexp_v2f64_v2i32_only_use_exp(<2 x double> %a) nounwind
; WINDOWS-NEXT: bl frexp
; WINDOWS-NEXT: ldr s0, [sp, #28]
; WINDOWS-NEXT: ld1 { v0.s }[1], [x19]
-; WINDOWS-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload
+; WINDOWS-NEXT: ldp x19, x30, [sp, #32] // 16-byte Folded Reload
; WINDOWS-NEXT: // kill: def $d0 killed $d0 killed $q0
; WINDOWS-NEXT: add sp, sp, #48
; WINDOWS-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/preserve_mostcc.ll b/llvm/test/CodeGen/AArch64/preserve_mostcc.ll
index f77ada4eae022..1fb2e5fd02e3c 100644
--- a/llvm/test/CodeGen/AArch64/preserve_mostcc.ll
+++ b/llvm/test/CodeGen/AArch64/preserve_mostcc.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc < %s -mtriple=arm64-apple-ios-8.0.0 | FileCheck -check-prefix CHECK -check-prefix CHECK-DARWIN %s
; RUN: llc < %s -mtriple=aarch64-unknown-windows-msvc | FileCheck -check-prefix CHECK -check-prefix CHECK-WIN %s
@@ -7,28 +8,35 @@ declare preserve_mostcc void @preserve_mostcc_func()
; Registers r9-r15 should be saved before the call of a function
; with a standard calling convention.
define preserve_mostcc void @preserve_mostcc1() nounwind {
+; CHECK-DARWIN-LABEL: preserve_mostcc1:
+; CHECK-DARWIN: ; %bb.0: ; %entry
+; CHECK-DARWIN-NEXT: str x15, [sp, #-80]! ; 8-byte Folded Spill
+; CHECK-DARWIN-NEXT: stp x14, x13, [sp, #16] ; 16-byte Folded Spill
+; CHECK-DARWIN-NEXT: stp x12, x11, [sp, #32] ; 16-byte Folded Spill
+; CHECK-DARWIN-NEXT: stp x10, x9, [sp, #48] ; 16-byte Folded Spill
+; CHECK-DARWIN-NEXT: stp x29, x30, [sp, #64] ; 16-byte Folded Spill
+; CHECK-DARWIN-NEXT: bl _standard_cc_func
+; CHECK-DARWIN-NEXT: ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
+; CHECK-DARWIN-NEXT: ldp x10, x9, [sp, #48] ; 16-byte Folded Reload
+; CHECK-DARWIN-NEXT: ldp x12, x11, [sp, #32] ; 16-byte Folded Reload
+; CHECK-DARWIN-NEXT: ldp x14, x13, [sp, #16] ; 16-byte Folded Reload
+; CHECK-DARWIN-NEXT: ldr x15, [sp], #80 ; 8-byte Folded Reload
+; CHECK-DARWIN-NEXT: ret
+;
+; CHECK-WIN-LABEL: preserve_mostcc1:
+; CHECK-WIN: // %bb.0: // %entry
+; CHECK-WIN-NEXT: stp x30, x9, [sp, #-64]! // 16-byte Folded Spill
+; CHECK-WIN-NEXT: stp x10, x11, [sp, #16] // 16-byte Folded Spill
+; CHECK-WIN-NEXT: stp x12, x13, [sp, #32] // 16-byte Folded Spill
+; CHECK-WIN-NEXT: stp x14, x15, [sp, #48] // 16-byte Folded Spill
+; CHECK-WIN-NEXT: bl standard_cc_func
+; CHECK-WIN-NEXT: ldp x14, x15, [sp, #48] // 16-byte Folded Reload
+; CHECK-WIN-NEXT: ldp x12, x13, [sp, #32] // 16-byte Folded Reload
+; CHECK-WIN-NEXT: ldp x10, x11, [sp, #16] // 16-byte Folded Reload
+; CHECK-WIN-NEXT: ldp x30, x9, [sp], #64 // 16-byte Folded Reload
+; CHECK-WIN-NEXT: ret
entry:
-;CHECK-LABEL: preserve_mostcc1
-;CHECK-DARWIN-NOT: stp
-;CHECK-DARWIN-NOT: str
-;CHECK-DARWIN: str x15
-;CHECK-DARWIN-NEXT: stp x14, x13,
-;CHECK-DARWIN-NEXT: stp x12, x11,
-;CHECK-DARWIN-NEXT: stp x10, x9,
-;CHECK-WIN: stp x15, x14
-;CHECK-WIN-NEXT: stp x13, x12,
-;CHECK-WIN-NEXT: stp x11, x10,
-;CHECK-WIN-NEXT: stp x9, x30
-;CHECK: bl {{_?}}standard_cc_func
call void @standard_cc_func()
-;CHECK-DARWIN: ldp x10, x9,
-;CHECK-DARWIN-NEXT: ldp x12, x11,
-;CHECK-DARWIN-NEXT: ldp x14, x13,
-;CHECK-DARWIN-NEXT: ldr x15
-;CHECK-WIN: ldp x9, x30
-;CHECK-WIN-NEXT: ldp x11, x10,
-;CHECK-WIN-NEXT: ldp x13, x12,
-;CHECK-WIN-NEXT: ldp x15, x14,
ret void
}
@@ -37,14 +45,23 @@ entry:
; function with preserve_mostcc calling convention, because the
; callee wil save these registers anyways.
define preserve_mostcc void @preserve_mostcc2() nounwind {
+; CHECK-DARWIN-LABEL: preserve_mostcc2:
+; CHECK-DARWIN: ; %bb.0: ; %entry
+; CHECK-DARWIN-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-DARWIN-NEXT: bl _preserve_mostcc_func
+; CHECK-DARWIN-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-DARWIN-NEXT: ret
+;
+; CHECK-WIN-LABEL: preserve_mostcc2:
+; CHECK-WIN: // %bb.0: // %entry
+; CHECK-WIN-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-WIN-NEXT: bl preserve_mostcc_func
+; CHECK-WIN-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-WIN-NEXT: ret
entry:
-;CHECK-LABEL: preserve_mostcc2
-;CHECK-NOT: x14
-;CHECK-DARWIN: stp x29, x30,
-;CHECK-WIN: str x30
-;CHECK-NOT: x14
-;CHECK: bl {{_?}}preserve_mostcc_func
call preserve_mostcc void @preserve_mostcc_func()
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll
index 784d5ed4b3c6f..4d5e7a32b9d5f 100644
--- a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll
+++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll
@@ -38,32 +38,31 @@ define i32 @caller() nounwind ssp {
; CHECK-NEXT: mov w5, #6 // =0x6
; CHECK-NEXT: mov w6, #7 // =0x7
; CHECK-NEXT: mov w7, #8 // =0x8
-; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: str x30, [sp, #80] // 8-byte Spill
-; CHECK-NEXT: stp x28, x27, [sp, #96] // 16-byte Folded Spill
-; CHECK-NEXT: stp x26, x25, [sp, #112] // 16-byte Folded Spill
-; CHECK-NEXT: stp x24, x23, [sp, #128] // 16-byte Folded Spill
-; CHECK-NEXT: stp x22, x21, [sp, #144] // 16-byte Folded Spill
-; CHECK-NEXT: stp x20, x19, [sp, #160] // 16-byte Folded Spill
+; CHECK-NEXT: stp x19, x20, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp x21, x22, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp x23, x24, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: stp x25, x26, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x27, x28, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #96] // 8-byte Spill
+; CHECK-NEXT: stp d8, d9, [sp, #104] // 16-byte Folded Spill
+; CHECK-NEXT: stp d10, d11, [sp, #120] // 16-byte Folded Spill
+; CHECK-NEXT: stp d12, d13, [sp, #136] // 16-byte Folded Spill
+; CHECK-NEXT: stp d14, d15, [sp, #152] // 16-byte Folded Spill
; CHECK-NEXT: str w8, [sp, #8]
; CHECK-NEXT: str w9, [sp]
; CHECK-NEXT: bl callee
-; CHECK-NEXT: ldp x20, x19, [sp, #160] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Reload
-; CHECK-NEXT: ldp x22, x21, [sp, #144] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x24, x23, [sp, #128] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x26, x25, [sp, #112] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x28, x27, [sp, #96] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d14, d15, [sp, #152] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Reload
+; CHECK-NEXT: ldp d12, d13, [sp, #136] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d10, d11, [sp, #120] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d8, d9, [sp, #104] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x27, x28, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x25, x26, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x23, x24, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x21, x22, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x19, x20, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #176
; CHECK-NEXT: ret
%r = tail call preserve_nonecc i32 (i32, i32, i32, i32, i32, ...) @callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10)
ret i32 %r
}
-
diff --git a/llvm/test/CodeGen/AArch64/sme-lazy-save-windows.ll b/llvm/test/CodeGen/AArch64/sme-lazy-save-windows.ll
index f7f8b223f233b..648cba57b95cf 100644
--- a/llvm/test/CodeGen/AArch64/sme-lazy-save-windows.ll
+++ b/llvm/test/CodeGen/AArch64/sme-lazy-save-windows.ll
@@ -8,9 +8,9 @@ declare void @shared_za_callee() "aarch64_inout_za"
define void @test_lazy_save() nounwind "aarch64_inout_za" {
; CHECK-LABEL: test_lazy_save:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp x30, x29, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill
-; CHECK-NEXT: mov x29, sp
+; CHECK-NEXT: str x19, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #8] // 16-byte Folded Spill
+; CHECK-NEXT: add x29, sp, #8
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mul x9, x8, x8
@@ -18,21 +18,21 @@ define void @test_lazy_save() nounwind "aarch64_inout_za" {
; CHECK-NEXT: bl __chkstk
; CHECK-NEXT: sub x9, sp, x15, lsl #4
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: sub x10, x29, #16
-; CHECK-NEXT: stp x9, x8, [x29, #-16]
+; CHECK-NEXT: sub x10, x29, #24
+; CHECK-NEXT: stp x9, x8, [x29, #-24]
; CHECK-NEXT: msr TPIDR2_EL0, x10
; CHECK-NEXT: bl private_za_callee
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-NEXT: sub x0, x29, #16
+; CHECK-NEXT: sub x0, x29, #24
; CHECK-NEXT: cbnz x8, .LBB0_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: msr TPIDR2_EL0, xzr
-; CHECK-NEXT: mov sp, x29
-; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload
-; CHECK-NEXT: ldp x30, x29, [sp], #32 // 16-byte Folded Reload
+; CHECK-NEXT: sub sp, x29, #8
+; CHECK-NEXT: ldp x29, x30, [sp, #8] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp], #32 // 8-byte Folded Reload
; CHECK-NEXT: ret
call void @private_za_callee()
ret void
diff --git a/llvm/test/CodeGen/AArch64/swift-async-win.ll b/llvm/test/CodeGen/AArch64/swift-async-win.ll
index 69c83072fbc85..c74e9667f9c75 100644
--- a/llvm/test/CodeGen/AArch64/swift-async-win.ll
+++ b/llvm/test/CodeGen/AArch64/swift-async-win.ll
@@ -18,27 +18,27 @@ declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #0
define hidden swifttailcc void @"$ss23withCheckedContinuation8function_xSS_yScCyxs5NeverOGXEtYalFTQ0_"(ptr nocapture readonly %0) #1 {
; CHECK-LABEL: $ss23withCheckedContinuation8function_xSS_yScCyxs5NeverOGXEtYalFTQ0_:
; CHECK: // %bb.0: // %entryresume.0
-; CHECK-NEXT: sub sp, sp, #48
-; CHECK-NEXT: stp x30, x29, [sp, #24] // 16-byte Folded Spill
-; CHECK-NEXT: add x29, sp, #24
-; CHECK-NEXT: str x19, [sp, #40] // 8-byte Spill
-; CHECK-NEXT: adrp x19, __imp_swift_task_dealloc
-; CHECK-NEXT: str xzr, [sp, #16]
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: stur x8, [x29, #-8]
-; CHECK-NEXT: ldr x20, [x0]
-; CHECK-NEXT: ldp x22, x0, [x8, #16]
-; CHECK-NEXT: stur x20, [x29, #-8]
-; CHECK-NEXT: ldr x19, [x19, :lo12:__imp_swift_task_dealloc]
-; CHECK-NEXT: blr x19
-; CHECK-NEXT: mov x0, x22
-; CHECK-NEXT: blr x19
-; CHECK-NEXT: ldp x30, x29, [sp, #24] // 16-byte Folded Reload
-; CHECK-NEXT: mov x0, x20
-; CHECK-NEXT: ldr x1, [x20, #8]
-; CHECK-NEXT: ldr x19, [sp, #40] // 8-byte Reload
-; CHECK-NEXT: add sp, sp, #48
-; CHECK-NEXT: br x1
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill
+; CHECK-NEXT: adrp x19, __imp_swift_task_dealloc
+; CHECK-NEXT: stp x29, x30, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: add x29, sp, #32
+; CHECK-NEXT: str xzr, [sp, #24]
+; CHECK-NEXT: ldr x8, [x0]
+; CHECK-NEXT: stur x8, [x29, #-8]
+; CHECK-NEXT: ldr x20, [x0]
+; CHECK-NEXT: ldp x22, x0, [x8, #16]
+; CHECK-NEXT: stur x20, [x29, #-8]
+; CHECK-NEXT: ldr x19, [x19, :lo12:__imp_swift_task_dealloc]
+; CHECK-NEXT: blr x19
+; CHECK-NEXT: mov x0, x22
+; CHECK-NEXT: blr x19
+; CHECK-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: mov x0, x20
+; CHECK-NEXT: ldr x1, [x20, #8]
+; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: br x1
entryresume.0:
%1 = load ptr, ptr %0, align 8
%2 = tail call ptr @llvm.swift.async.context.addr() #4
@@ -70,4 +70,3 @@ attributes #1 = { nounwind "frame-pointer"="none" "no-trapping-math"="true" "sta
attributes #2 = { nounwind readnone }
attributes #3 = { argmemonly nounwind }
attributes #4 = { nounwind }
-
diff --git a/llvm/test/CodeGen/AArch64/win64-no-uwtable.ll b/llvm/test/CodeGen/AArch64/win64-no-uwtable.ll
index e0ef14ba23921..4d3a9f34a1d16 100644
--- a/llvm/test/CodeGen/AArch64/win64-no-uwtable.ll
+++ b/llvm/test/CodeGen/AArch64/win64-no-uwtable.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=arm64-windows -o - %s | FileCheck %s
declare void @f()
@@ -5,21 +6,21 @@ declare void @g()
; Function Attrs: nounwind
define dso_local void @SEHfilter() nounwind "frame-pointer"="all" {
-; CHECK-LABEL: @SEHfilter
-; CHECK: %bb.0:
-; CHECK-NEXT: stp x30, x29, [sp, #-32]!
-; CHECK-NEXT: str x19, [sp, #16]
-; CHECK-NEXT: ldr w19, [x8]
-; CHECK-NEXT: mov x29, sp
-; CHECK-NEXT: bl g
-; CHECK-NEXT: cbz w19, .LBB0_2
-; CHECK-NEXT: // %bb.1:
-; CHECK-NEXT: ldr x19, [sp, #16]
-; CHECK-NEXT: ldp x30, x29, [sp], #32
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB0_2: // %if.end.i
-; CHECK-NEXT: bl f
-; CHECK-NEXT: brk #0x1
+; CHECK-LABEL: SEHfilter:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x19, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-NEXT: ldr w19, [x8]
+; CHECK-NEXT: stp x29, x30, [sp, #8] // 16-byte Folded Spill
+; CHECK-NEXT: add x29, sp, #8
+; CHECK-NEXT: bl g
+; CHECK-NEXT: cbz w19, .LBB0_2
+; CHECK-NEXT: // %bb.1: // %exit
+; CHECK-NEXT: ldp x29, x30, [sp, #8] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp], #32 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB0_2: // %if.end.i
+; CHECK-NEXT: bl f
+; CHECK-NEXT: brk #0x1
%1 = load i32, ptr undef, align 4
tail call void @g()
%tobool.i = icmp eq i32 %1, 0
More information about the llvm-commits
mailing list