[llvm] 5987da8 - Revert "Revert "Reapply D70800: Fix AArch64 AAPCS frame record chain""
Owen Anderson via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 1 12:29:14 PDT 2020
Author: Owen Anderson
Date: 2020-09-01T19:29:03Z
New Revision: 5987da8764b71cd59fec772323cc2003bd82da38
URL: https://github.com/llvm/llvm-project/commit/5987da8764b71cd59fec772323cc2003bd82da38
DIFF: https://github.com/llvm/llvm-project/commit/5987da8764b71cd59fec772323cc2003bd82da38.diff
LOG: Revert "Revert "Reapply D70800: Fix AArch64 AAPCS frame record chain""
This reverts commit bc9a29b9ee6ade4894252b1470977142c32b4602.
The reasoning that this patch was wrong was itself incorrect
(see discussion on llvm-commits). This patch does seem to be exposing
a latent SVE code generation bug on non-public tests, which should
not block a correctness fix for public, non-SVE use cases.
Added:
llvm/test/CodeGen/AArch64/framelayout-fp-csr.ll
llvm/test/CodeGen/AArch64/framelayout-frame-record.mir
llvm/test/CodeGen/AArch64/framelayout-unaligned-fp.ll
Modified:
llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index c6cc6e9e8471..751791bdb354 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1024,10 +1024,6 @@ static bool needsWinCFI(const MachineFunction &MF) {
F.needsUnwindTableEntry();
}
-static bool isTargetDarwin(const MachineFunction &MF) {
- return MF.getSubtarget<AArch64Subtarget>().isTargetDarwin();
-}
-
static bool isTargetWindows(const MachineFunction &MF) {
return MF.getSubtarget<AArch64Subtarget>().isTargetWindows();
}
@@ -1185,7 +1181,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// For funclets the FP belongs to the containing function.
if (!IsFunclet && HasFP) {
// Only set up FP if we actually need to.
- int64_t FPOffset = isTargetDarwin(MF) ? (AFI->getCalleeSavedStackSize() - 16) : 0;
+ int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset();
if (CombineSPBump)
FPOffset += AFI->getLocalStackSize();
@@ -1409,11 +1405,6 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
}
if (needsFrameMoves) {
- const DataLayout &TD = MF.getDataLayout();
- const int StackGrowth = isTargetDarwin(MF)
- ? (2 * -TD.getPointerSize(0))
- : -AFI->getCalleeSavedStackSize();
- Register FramePtr = RegInfo->getFrameRegister(MF);
// An example of the prologue:
//
// .globl __foo
@@ -1481,10 +1472,15 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// .cfi_offset w28, -32
if (HasFP) {
+ const int OffsetToFirstCalleeSaveFromFP =
+ AFI->getCalleeSaveBaseToFrameRecordOffset() -
+ AFI->getCalleeSavedStackSize();
+ Register FramePtr = RegInfo->getFrameRegister(MF);
+
// Define the current CFA rule to use the provided FP.
unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::cfiDefCfa(nullptr, Reg, FixedObject - StackGrowth));
+ MCCFIInstruction::cfiDefCfa(nullptr, Reg, FixedObject - OffsetToFirstCalleeSaveFromFP));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
@@ -1775,10 +1771,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// non-post-indexed loads for the restores if we aren't actually going to
// be able to save any instructions.
if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) {
- int64_t OffsetToFrameRecord =
- isTargetDarwin(MF) ? (-(int64_t)AFI->getCalleeSavedStackSize() + 16) : 0;
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
- {OffsetToFrameRecord, MVT::i8},
+ {-AFI->getCalleeSaveBaseToFrameRecordOffset(), MVT::i8},
TII, MachineInstr::FrameDestroy, false, NeedsWinCFI);
} else if (NumBytes)
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
@@ -1839,11 +1833,11 @@ static StackOffset getFPOffset(const MachineFunction &MF, int64_t ObjectOffset)
const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
bool IsWin64 =
Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
-
unsigned FixedObject =
getFixedObjectSize(MF, AFI, IsWin64, /*IsFunclet=*/false);
- unsigned FPAdjust = isTargetDarwin(MF)
- ? 16 : AFI->getCalleeSavedStackSize(MF.getFrameInfo());
+ int64_t CalleeSaveSize = AFI->getCalleeSavedStackSize(MF.getFrameInfo());
+ int64_t FPAdjust =
+ CalleeSaveSize - AFI->getCalleeSaveBaseToFrameRecordOffset();
return {ObjectOffset + FixedObject + FPAdjust, MVT::i8};
}
@@ -2231,6 +2225,14 @@ static void computeCalleeSaveRegisterPairs(
(RPI.isScalable() && RPI.Offset >= -256 && RPI.Offset <= 255)) &&
"Offset out of bounds for LDP/STP immediate");
+ // Save the offset to frame record so that the FP register can point to the
+ // innermost frame record (spilled FP and LR registers).
+ if (NeedsFrameRecord && ((!IsWindows && RPI.Reg1 == AArch64::LR &&
+ RPI.Reg2 == AArch64::FP) ||
+ (IsWindows && RPI.Reg1 == AArch64::FP &&
+ RPI.Reg2 == AArch64::LR)))
+ AFI->setCalleeSaveBaseToFrameRecordOffset(Offset);
+
RegPairs.push_back(RPI);
if (RPI.isPaired())
++i;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 86aacedebdfe..9e37d0292e7a 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -3442,8 +3442,8 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
// First emit non-scalable frame offsets, or a simple 'mov'.
if (Bytes || (!Offset && SrcReg != DestReg)) {
- assert((DestReg != AArch64::SP || Bytes % 16 == 0) &&
- "SP increment/decrement not 16-byte aligned");
+ assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
+ "SP increment/decrement not 8-byte aligned");
unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
if (Bytes < 0) {
Bytes = -Bytes;
diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index 84aa53f2bece..9562269336d8 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -135,6 +135,10 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
/// e.g. Tail Call, Thunk, or Function if none apply.
Optional<std::string> OutliningStyle;
+ // Offset from SP-after-callee-saved-spills (i.e. SP-at-entry minus
+ // CalleeSavedStackSize) to the address of the frame record.
+ int CalleeSaveBaseToFrameRecordOffset = 0;
+
public:
AArch64FunctionInfo() = default;
@@ -338,6 +342,13 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
TaggedBasePointerOffset = Offset;
}
+ int getCalleeSaveBaseToFrameRecordOffset() const {
+ return CalleeSaveBaseToFrameRecordOffset;
+ }
+ void setCalleeSaveBaseToFrameRecordOffset(int Offset) {
+ CalleeSaveBaseToFrameRecordOffset = Offset;
+ }
+
private:
// Hold the lists of LOHs.
MILOHContainer LOHContainerSet;
diff --git a/llvm/test/CodeGen/AArch64/framelayout-fp-csr.ll b/llvm/test/CodeGen/AArch64/framelayout-fp-csr.ll
new file mode 100644
index 000000000000..3b13dee29f06
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/framelayout-fp-csr.ll
@@ -0,0 +1,22 @@
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -disable-post-ra --frame-pointer=all < %s | FileCheck %s
+
+; The purpose of this test is to verify that frame pointer (x29)
+; is correctly setup in the presence of callee-saved floating
+; point registers. The frame pointer should point to the frame
+; record, which is located 16 bytes above the end of the CSR
+; space when a single FP CSR is in use.
+define void @test1(i32) #26 {
+entry:
+ call void asm sideeffect "nop", "~{d8}"() #26
+ ret void
+}
+; CHECK-LABEL: test1:
+; CHECK: str d8, [sp, #-32]!
+; CHECK-NEXT: stp x29, x30, [sp, #16]
+; CHECK-NEXT: add x29, sp, #16
+; CHECK: nop
+; CHECK: ldp x29, x30, [sp, #16]
+; CHECK-NEXT: ldr d8, [sp], #32
+; CHECK-NEXT: ret
+
+attributes #26 = { nounwind }
diff --git a/llvm/test/CodeGen/AArch64/framelayout-frame-record.mir b/llvm/test/CodeGen/AArch64/framelayout-frame-record.mir
new file mode 100644
index 000000000000..ab4af04401c5
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/framelayout-frame-record.mir
@@ -0,0 +1,29 @@
+# RUN: llc -mtriple=aarch64-linux-gnu -start-before prologepilog %s -o - | FileCheck %s
+
+---
+name: TestFrameRecordLocation
+tracksRegLiveness: true
+frameInfo:
+ isFrameAddressTaken: true
+body: |
+ bb.0:
+ $d8 = IMPLICIT_DEF
+ $d9 = IMPLICIT_DEF
+ $x19 = IMPLICIT_DEF
+ RET_ReallyLR
+
+# CHECK-LABEL: TestFrameRecordLocation
+
+# CHECK: stp d9, d8, [sp, #-48]!
+# CHECK: stp x29, x30, [sp, #16]
+# CHECK: str x19, [sp, #32]
+
+# CHECK: add x29, sp, #16
+
+# CHECK: .cfi_def_cfa w29, 32
+# CHECK: .cfi_offset w19, -16
+# CHECK: .cfi_offset w30, -24
+# CHECK: .cfi_offset w29, -32
+# CHECK: .cfi_offset b8, -40
+# CHECK: .cfi_offset b9, -48
+...
diff --git a/llvm/test/CodeGen/AArch64/framelayout-unaligned-fp.ll b/llvm/test/CodeGen/AArch64/framelayout-unaligned-fp.ll
new file mode 100644
index 000000000000..160eb2dd099b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/framelayout-unaligned-fp.ll
@@ -0,0 +1,42 @@
+; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+
+; The purpose of this test is to construct a scenario where an odd number
+; of callee-saved GPRs as well as an odd number of callee-saved FPRs are
+; used. This caused the frame pointer to be aligned to a multiple of 8
+; on non-Darwin platforms, rather than a multiple of 16 as usual.
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+ at a = global i64 0, align 4
+
+
+define i64 @b() {
+entry:
+ %call = tail call i64 @d()
+ %0 = alloca i8, i64 ptrtoint (i64 ()* @d to i64), align 16
+ %1 = ptrtoint i8* %0 to i64
+ store i64 %1, i64* @a, align 4
+ %call1 = call i64 @e()
+ %conv = sitofp i64 %call1 to float
+ %2 = load i64, i64* @a, align 4
+ %call2 = call i64 @f(i64 %2)
+ %conv3 = fptosi float %conv to i64
+ ret i64 %conv3
+}
+
+; CHECK-LABEL: b:
+; CHECK: str d8, [sp, #-32]!
+; CHECK-NEXT: stp x29, x30, [sp, #8]
+; CHECK-NEXT: str x19, [sp, #24]
+; CHECK-NEXT: add x29, sp, #8
+
+; CHECK: sub sp, x29, #8
+; CHECK-NEXT: ldr x19, [sp, #24]
+; CHECK-NEXT: ldp x29, x30, [sp, #8]
+; CHECK-NEXT: ldr d8, [sp], #32
+; CHECK-NEXT: ret
+
+declare i64 @d()
+declare i64 @e()
+declare i64 @f(i64)
More information about the llvm-commits
mailing list