[llvm] bc9a29b - Revert "Reapply D70800: Fix AArch64 AAPCS frame record chain"

Tue Sep 1 08:10:45 PDT 2020

Author: Paul Walker
Date: 2020-09-01T16:09:37+01:00
New Revision: bc9a29b9ee6ade4894252b1470977142c32b4602

URL: https://github.com/llvm/llvm-project/commit/bc9a29b9ee6ade4894252b1470977142c32b4602
DIFF: https://github.com/llvm/llvm-project/commit/bc9a29b9ee6ade4894252b1470977142c32b4602.diff

LOG: Revert "Reapply D70800: Fix AArch64 AAPCS frame record chain"

This reverts commit e9d9a612084b47fc4277523561d61e675370c854.

This patch was previously revert by 04879086b44348cad600a0a1ccbe1f7776cc3cf9
with the reapplication being done after breaking the assert used to
ensure SP is always 16-byte aligned, which is a requirement of the AAPCS.

For extra context the latest patch caused runtime failures when
building with "-march=armv8-a+sve -mllvm -aarch64-sve-vector-bits-min=256".

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
    llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
    llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h

Removed: 
    llvm/test/CodeGen/AArch64/framelayout-fp-csr.ll
    llvm/test/CodeGen/AArch64/framelayout-frame-record.mir
    llvm/test/CodeGen/AArch64/framelayout-unaligned-fp.ll


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 751791bdb354..c6cc6e9e8471 100644

--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1024,6 +1024,10 @@ static bool needsWinCFI(const MachineFunction &MF) {
          F.needsUnwindTableEntry();
 }
 
+static bool isTargetDarwin(const MachineFunction &MF) {
+  return MF.getSubtarget<AArch64Subtarget>().isTargetDarwin();
+}
+
 static bool isTargetWindows(const MachineFunction &MF) {
   return MF.getSubtarget<AArch64Subtarget>().isTargetWindows();
 }
@@ -1181,7 +1185,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   // For funclets the FP belongs to the containing function.
   if (!IsFunclet && HasFP) {
     // Only set up FP if we actually need to.
-    int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset();
+    int64_t FPOffset = isTargetDarwin(MF) ? (AFI->getCalleeSavedStackSize() - 16) : 0;
 
     if (CombineSPBump)
       FPOffset += AFI->getLocalStackSize();
@@ -1405,6 +1409,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   }
 
   if (needsFrameMoves) {
+    const DataLayout &TD = MF.getDataLayout();
+    const int StackGrowth = isTargetDarwin(MF)
+                                ? (2 * -TD.getPointerSize(0))
+                                : -AFI->getCalleeSavedStackSize();
+    Register FramePtr = RegInfo->getFrameRegister(MF);
     // An example of the prologue:
     //
     //     .globl __foo
@@ -1472,15 +1481,10 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
     //     .cfi_offset w28, -32
 
     if (HasFP) {
-      const int OffsetToFirstCalleeSaveFromFP =
-          AFI->getCalleeSaveBaseToFrameRecordOffset() -
-          AFI->getCalleeSavedStackSize();
-      Register FramePtr = RegInfo->getFrameRegister(MF);
-
       // Define the current CFA rule to use the provided FP.
       unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
       unsigned CFIIndex = MF.addFrameInst(
-          MCCFIInstruction::cfiDefCfa(nullptr, Reg, FixedObject - OffsetToFirstCalleeSaveFromFP));
+          MCCFIInstruction::cfiDefCfa(nullptr, Reg, FixedObject - StackGrowth));
       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
           .addCFIIndex(CFIIndex)
           .setMIFlags(MachineInstr::FrameSetup);
@@ -1771,8 +1775,10 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
   // non-post-indexed loads for the restores if we aren't actually going to
   // be able to save any instructions.
   if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) {
+    int64_t OffsetToFrameRecord =
+        isTargetDarwin(MF) ? (-(int64_t)AFI->getCalleeSavedStackSize() + 16) : 0;
     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
-                    {-AFI->getCalleeSaveBaseToFrameRecordOffset(), MVT::i8},
+                    {OffsetToFrameRecord, MVT::i8},
                     TII, MachineInstr::FrameDestroy, false, NeedsWinCFI);
   } else if (NumBytes)
     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
@@ -1833,11 +1839,11 @@ static StackOffset getFPOffset(const MachineFunction &MF, int64_t ObjectOffset)
   const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
   bool IsWin64 =
       Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
+
   unsigned FixedObject =
       getFixedObjectSize(MF, AFI, IsWin64, /*IsFunclet=*/false);
-  int64_t CalleeSaveSize = AFI->getCalleeSavedStackSize(MF.getFrameInfo());
-  int64_t FPAdjust =
-      CalleeSaveSize - AFI->getCalleeSaveBaseToFrameRecordOffset();
+  unsigned FPAdjust = isTargetDarwin(MF)
+                        ? 16 : AFI->getCalleeSavedStackSize(MF.getFrameInfo());
   return {ObjectOffset + FixedObject + FPAdjust, MVT::i8};
 }
 
@@ -2225,14 +2231,6 @@ static void computeCalleeSaveRegisterPairs(
             (RPI.isScalable() && RPI.Offset >= -256 && RPI.Offset <= 255)) &&
            "Offset out of bounds for LDP/STP immediate");
 
-    // Save the offset to frame record so that the FP register can point to the
-    // innermost frame record (spilled FP and LR registers).
-    if (NeedsFrameRecord && ((!IsWindows && RPI.Reg1 == AArch64::LR &&
-                              RPI.Reg2 == AArch64::FP) ||
-                             (IsWindows && RPI.Reg1 == AArch64::FP &&
-                              RPI.Reg2 == AArch64::LR)))
-      AFI->setCalleeSaveBaseToFrameRecordOffset(Offset);
-
     RegPairs.push_back(RPI);
     if (RPI.isPaired())
       ++i;

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 9e37d0292e7a..86aacedebdfe 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -3442,8 +3442,8 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
 
   // First emit non-scalable frame offsets, or a simple 'mov'.
   if (Bytes || (!Offset && SrcReg != DestReg)) {
-    assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
-           "SP increment/decrement not 8-byte aligned");
+    assert((DestReg != AArch64::SP || Bytes % 16 == 0) &&
+           "SP increment/decrement not 16-byte aligned");
     unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
     if (Bytes < 0) {
       Bytes = -Bytes;

diff  --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index 9562269336d8..84aa53f2bece 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -135,10 +135,6 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
   /// e.g. Tail Call, Thunk, or Function if none apply.
   Optional<std::string> OutliningStyle;
 
-  // Offset from SP-after-callee-saved-spills (i.e. SP-at-entry minus
-  // CalleeSavedStackSize) to the address of the frame record.
-  int CalleeSaveBaseToFrameRecordOffset = 0;
-
 public:
   AArch64FunctionInfo() = default;
 
@@ -342,13 +338,6 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
     TaggedBasePointerOffset = Offset;
   }
 
-  int getCalleeSaveBaseToFrameRecordOffset() const {
-    return CalleeSaveBaseToFrameRecordOffset;
-  }
-  void setCalleeSaveBaseToFrameRecordOffset(int Offset) {
-    CalleeSaveBaseToFrameRecordOffset = Offset;
-  }
-
 private:
   // Hold the lists of LOHs.
   MILOHContainer LOHContainerSet;

diff  --git a/llvm/test/CodeGen/AArch64/framelayout-fp-csr.ll b/llvm/test/CodeGen/AArch64/framelayout-fp-csr.ll
deleted file mode 100644
index 3b13dee29f06..000000000000
--- a/llvm/test/CodeGen/AArch64/framelayout-fp-csr.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -disable-post-ra --frame-pointer=all < %s | FileCheck %s
-
-; The purpose of this test is to verify that frame pointer (x29)
-; is correctly setup in the presence of callee-saved floating
-; point registers.  The frame pointer should point to the frame
-; record, which is located 16 bytes above the end of the CSR
-; space when a single FP CSR is in use.
-define void @test1(i32) #26 {
-entry:
-  call void asm sideeffect "nop", "~{d8}"() #26
-  ret void
-}
-; CHECK-LABEL: test1:
-; CHECK:       str     d8, [sp, #-32]!
-; CHECK-NEXT:  stp     x29, x30, [sp, #16]
-; CHECK-NEXT:  add     x29, sp, #16
-; CHECK:       nop
-; CHECK:       ldp     x29, x30, [sp, #16]
-; CHECK-NEXT:  ldr     d8, [sp], #32
-; CHECK-NEXT:  ret
-
-attributes #26 = { nounwind }

diff  --git a/llvm/test/CodeGen/AArch64/framelayout-frame-record.mir b/llvm/test/CodeGen/AArch64/framelayout-frame-record.mir
deleted file mode 100644
index ab4af04401c5..000000000000
--- a/llvm/test/CodeGen/AArch64/framelayout-frame-record.mir
+++ /dev/null
@@ -1,29 +0,0 @@
-# RUN: llc -mtriple=aarch64-linux-gnu -start-before prologepilog %s -o - | FileCheck %s
-
----
-name: TestFrameRecordLocation
-tracksRegLiveness: true
-frameInfo:
-  isFrameAddressTaken: true
-body: |
-  bb.0:
-    $d8 = IMPLICIT_DEF
-    $d9 = IMPLICIT_DEF
-    $x19 = IMPLICIT_DEF
-    RET_ReallyLR
-
-# CHECK-LABEL: TestFrameRecordLocation
-
-# CHECK: stp d9, d8, [sp, #-48]!
-# CHECK: stp x29, x30, [sp, #16]
-# CHECK: str x19, [sp, #32]
-
-# CHECK: add x29, sp, #16
-
-# CHECK: .cfi_def_cfa w29, 32
-# CHECK: .cfi_offset w19, -16
-# CHECK: .cfi_offset w30, -24
-# CHECK: .cfi_offset w29, -32
-# CHECK: .cfi_offset b8, -40
-# CHECK: .cfi_offset b9, -48
-...

diff  --git a/llvm/test/CodeGen/AArch64/framelayout-unaligned-fp.ll b/llvm/test/CodeGen/AArch64/framelayout-unaligned-fp.ll
deleted file mode 100644
index 160eb2dd099b..000000000000
--- a/llvm/test/CodeGen/AArch64/framelayout-unaligned-fp.ll
+++ /dev/null
@@ -1,42 +0,0 @@
-; RUN: llc -verify-machineinstrs < %s | FileCheck %s
-
-; The purpose of this test is to construct a scenario where an odd number
-; of callee-saved GPRs as well as an odd number of callee-saved FPRs are
-; used. This caused the frame pointer to be aligned to a multiple of 8
-; on non-Darwin platforms, rather than a multiple of 16 as usual.
-
-target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
-target triple = "aarch64-unknown-linux-gnu"
-
- at a = global i64 0, align 4
-
-
-define i64 @b() {
-entry:
-  %call = tail call i64 @d()
-  %0 = alloca i8, i64 ptrtoint (i64 ()* @d to i64), align 16
-  %1 = ptrtoint i8* %0 to i64
-  store i64 %1, i64* @a, align 4
-  %call1 = call i64 @e()
-  %conv = sitofp i64 %call1 to float
-  %2 = load i64, i64* @a, align 4
-  %call2 = call i64 @f(i64 %2)
-  %conv3 = fptosi float %conv to i64
-  ret i64 %conv3
-}
-
-; CHECK-LABEL: b:
-; CHECK:       str     d8, [sp, #-32]!
-; CHECK-NEXT:  stp     x29, x30, [sp, #8]
-; CHECK-NEXT:  str     x19, [sp, #24]
-; CHECK-NEXT:  add     x29, sp, #8
-
-; CHECK:       sub     sp, x29, #8
-; CHECK-NEXT:  ldr     x19, [sp, #24]
-; CHECK-NEXT:  ldp     x29, x30, [sp, #8]
-; CHECK-NEXT:  ldr     d8, [sp], #32
-; CHECK-NEXT:  ret
-
-declare i64 @d()
-declare i64 @e()
-declare i64 @f(i64)