[llvm] 5987da8 - Revert "Revert "Reapply D70800: Fix AArch64 AAPCS frame record chain""

Tue Sep 1 12:29:14 PDT 2020

Author: Owen Anderson
Date: 2020-09-01T19:29:03Z
New Revision: 5987da8764b71cd59fec772323cc2003bd82da38

URL: https://github.com/llvm/llvm-project/commit/5987da8764b71cd59fec772323cc2003bd82da38
DIFF: https://github.com/llvm/llvm-project/commit/5987da8764b71cd59fec772323cc2003bd82da38.diff

LOG: Revert "Revert "Reapply D70800: Fix AArch64 AAPCS frame record chain""

This reverts commit bc9a29b9ee6ade4894252b1470977142c32b4602.

The reasoning that this patch was wrong was itself incorrect
(see discussion on llvm-commits). This patch does seem to be exposing
a latent SVE code generation bug on non-public tests, which should
not block a correctness fix for public, non-SVE use cases.

Added: 
    llvm/test/CodeGen/AArch64/framelayout-fp-csr.ll
    llvm/test/CodeGen/AArch64/framelayout-frame-record.mir
    llvm/test/CodeGen/AArch64/framelayout-unaligned-fp.ll

Modified: 
    llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
    llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
    llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index c6cc6e9e8471..751791bdb354 100644

--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1024,10 +1024,6 @@ static bool needsWinCFI(const MachineFunction &MF) {
          F.needsUnwindTableEntry();
 }
 
-static bool isTargetDarwin(const MachineFunction &MF) {
-  return MF.getSubtarget<AArch64Subtarget>().isTargetDarwin();
-}
-
 static bool isTargetWindows(const MachineFunction &MF) {
   return MF.getSubtarget<AArch64Subtarget>().isTargetWindows();
 }
@@ -1185,7 +1181,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   // For funclets the FP belongs to the containing function.
   if (!IsFunclet && HasFP) {
     // Only set up FP if we actually need to.
-    int64_t FPOffset = isTargetDarwin(MF) ? (AFI->getCalleeSavedStackSize() - 16) : 0;
+    int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset();
 
     if (CombineSPBump)
       FPOffset += AFI->getLocalStackSize();
@@ -1409,11 +1405,6 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   }
 
   if (needsFrameMoves) {
-    const DataLayout &TD = MF.getDataLayout();
-    const int StackGrowth = isTargetDarwin(MF)
-                                ? (2 * -TD.getPointerSize(0))
-                                : -AFI->getCalleeSavedStackSize();
-    Register FramePtr = RegInfo->getFrameRegister(MF);
     // An example of the prologue:
     //
     //     .globl __foo
@@ -1481,10 +1472,15 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
     //     .cfi_offset w28, -32
 
     if (HasFP) {
+      const int OffsetToFirstCalleeSaveFromFP =
+          AFI->getCalleeSaveBaseToFrameRecordOffset() -
+          AFI->getCalleeSavedStackSize();
+      Register FramePtr = RegInfo->getFrameRegister(MF);
+
       // Define the current CFA rule to use the provided FP.
       unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
       unsigned CFIIndex = MF.addFrameInst(
-          MCCFIInstruction::cfiDefCfa(nullptr, Reg, FixedObject - StackGrowth));
+          MCCFIInstruction::cfiDefCfa(nullptr, Reg, FixedObject - OffsetToFirstCalleeSaveFromFP));
       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
           .addCFIIndex(CFIIndex)
           .setMIFlags(MachineInstr::FrameSetup);
@@ -1775,10 +1771,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
   // non-post-indexed loads for the restores if we aren't actually going to
   // be able to save any instructions.
   if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) {
-    int64_t OffsetToFrameRecord =
-        isTargetDarwin(MF) ? (-(int64_t)AFI->getCalleeSavedStackSize() + 16) : 0;
     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
-                    {OffsetToFrameRecord, MVT::i8},
+                    {-AFI->getCalleeSaveBaseToFrameRecordOffset(), MVT::i8},
                     TII, MachineInstr::FrameDestroy, false, NeedsWinCFI);
   } else if (NumBytes)
     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
@@ -1839,11 +1833,11 @@ static StackOffset getFPOffset(const MachineFunction &MF, int64_t ObjectOffset)
   const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
   bool IsWin64 =
       Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
-
   unsigned FixedObject =
       getFixedObjectSize(MF, AFI, IsWin64, /*IsFunclet=*/false);
-  unsigned FPAdjust = isTargetDarwin(MF)
-                        ? 16 : AFI->getCalleeSavedStackSize(MF.getFrameInfo());
+  int64_t CalleeSaveSize = AFI->getCalleeSavedStackSize(MF.getFrameInfo());
+  int64_t FPAdjust =
+      CalleeSaveSize - AFI->getCalleeSaveBaseToFrameRecordOffset();
   return {ObjectOffset + FixedObject + FPAdjust, MVT::i8};
 }
 
@@ -2231,6 +2225,14 @@ static void computeCalleeSaveRegisterPairs(
             (RPI.isScalable() && RPI.Offset >= -256 && RPI.Offset <= 255)) &&
            "Offset out of bounds for LDP/STP immediate");
 
+    // Save the offset to frame record so that the FP register can point to the
+    // innermost frame record (spilled FP and LR registers).
+    if (NeedsFrameRecord && ((!IsWindows && RPI.Reg1 == AArch64::LR &&
+                              RPI.Reg2 == AArch64::FP) ||
+                             (IsWindows && RPI.Reg1 == AArch64::FP &&
+                              RPI.Reg2 == AArch64::LR)))
+      AFI->setCalleeSaveBaseToFrameRecordOffset(Offset);
+
     RegPairs.push_back(RPI);
     if (RPI.isPaired())
       ++i;

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 86aacedebdfe..9e37d0292e7a 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -3442,8 +3442,8 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
 
   // First emit non-scalable frame offsets, or a simple 'mov'.
   if (Bytes || (!Offset && SrcReg != DestReg)) {
-    assert((DestReg != AArch64::SP || Bytes % 16 == 0) &&
-           "SP increment/decrement not 16-byte aligned");
+    assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
+           "SP increment/decrement not 8-byte aligned");
     unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
     if (Bytes < 0) {
       Bytes = -Bytes;

diff  --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index 84aa53f2bece..9562269336d8 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -135,6 +135,10 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
   /// e.g. Tail Call, Thunk, or Function if none apply.
   Optional<std::string> OutliningStyle;
 
+  // Offset from SP-after-callee-saved-spills (i.e. SP-at-entry minus
+  // CalleeSavedStackSize) to the address of the frame record.
+  int CalleeSaveBaseToFrameRecordOffset = 0;
+
 public:
   AArch64FunctionInfo() = default;
 
@@ -338,6 +342,13 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
     TaggedBasePointerOffset = Offset;
   }
 
+  int getCalleeSaveBaseToFrameRecordOffset() const {
+    return CalleeSaveBaseToFrameRecordOffset;
+  }
+  void setCalleeSaveBaseToFrameRecordOffset(int Offset) {
+    CalleeSaveBaseToFrameRecordOffset = Offset;
+  }
+
 private:
   // Hold the lists of LOHs.
   MILOHContainer LOHContainerSet;

diff  --git a/llvm/test/CodeGen/AArch64/framelayout-fp-csr.ll b/llvm/test/CodeGen/AArch64/framelayout-fp-csr.ll
new file mode 100644
index 000000000000..3b13dee29f06
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/framelayout-fp-csr.ll
@@ -0,0 +1,22 @@
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -disable-post-ra --frame-pointer=all < %s | FileCheck %s
+
+; The purpose of this test is to verify that frame pointer (x29)
+; is correctly setup in the presence of callee-saved floating
+; point registers.  The frame pointer should point to the frame
+; record, which is located 16 bytes above the end of the CSR
+; space when a single FP CSR is in use.
+define void @test1(i32) #26 {
+entry:
+  call void asm sideeffect "nop", "~{d8}"() #26
+  ret void
+}
+; CHECK-LABEL: test1:
+; CHECK:       str     d8, [sp, #-32]!
+; CHECK-NEXT:  stp     x29, x30, [sp, #16]
+; CHECK-NEXT:  add     x29, sp, #16
+; CHECK:       nop
+; CHECK:       ldp     x29, x30, [sp, #16]
+; CHECK-NEXT:  ldr     d8, [sp], #32
+; CHECK-NEXT:  ret
+
+attributes #26 = { nounwind }

diff  --git a/llvm/test/CodeGen/AArch64/framelayout-frame-record.mir b/llvm/test/CodeGen/AArch64/framelayout-frame-record.mir
new file mode 100644
index 000000000000..ab4af04401c5
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/framelayout-frame-record.mir
@@ -0,0 +1,29 @@
+# RUN: llc -mtriple=aarch64-linux-gnu -start-before prologepilog %s -o - | FileCheck %s
+
+---
+name: TestFrameRecordLocation
+tracksRegLiveness: true
+frameInfo:
+  isFrameAddressTaken: true
+body: |
+  bb.0:
+    $d8 = IMPLICIT_DEF
+    $d9 = IMPLICIT_DEF
+    $x19 = IMPLICIT_DEF
+    RET_ReallyLR
+
+# CHECK-LABEL: TestFrameRecordLocation
+
+# CHECK: stp d9, d8, [sp, #-48]!
+# CHECK: stp x29, x30, [sp, #16]
+# CHECK: str x19, [sp, #32]
+
+# CHECK: add x29, sp, #16
+
+# CHECK: .cfi_def_cfa w29, 32
+# CHECK: .cfi_offset w19, -16
+# CHECK: .cfi_offset w30, -24
+# CHECK: .cfi_offset w29, -32
+# CHECK: .cfi_offset b8, -40
+# CHECK: .cfi_offset b9, -48
+...

diff  --git a/llvm/test/CodeGen/AArch64/framelayout-unaligned-fp.ll b/llvm/test/CodeGen/AArch64/framelayout-unaligned-fp.ll
new file mode 100644
index 000000000000..160eb2dd099b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/framelayout-unaligned-fp.ll
@@ -0,0 +1,42 @@
+; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+
+; The purpose of this test is to construct a scenario where an odd number
+; of callee-saved GPRs as well as an odd number of callee-saved FPRs are
+; used. This caused the frame pointer to be aligned to a multiple of 8
+; on non-Darwin platforms, rather than a multiple of 16 as usual.
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+ at a = global i64 0, align 4
+
+
+define i64 @b() {
+entry:
+  %call = tail call i64 @d()
+  %0 = alloca i8, i64 ptrtoint (i64 ()* @d to i64), align 16
+  %1 = ptrtoint i8* %0 to i64
+  store i64 %1, i64* @a, align 4
+  %call1 = call i64 @e()
+  %conv = sitofp i64 %call1 to float
+  %2 = load i64, i64* @a, align 4
+  %call2 = call i64 @f(i64 %2)
+  %conv3 = fptosi float %conv to i64
+  ret i64 %conv3
+}
+
+; CHECK-LABEL: b:
+; CHECK:       str     d8, [sp, #-32]!
+; CHECK-NEXT:  stp     x29, x30, [sp, #8]
+; CHECK-NEXT:  str     x19, [sp, #24]
+; CHECK-NEXT:  add     x29, sp, #8
+
+; CHECK:       sub     sp, x29, #8
+; CHECK-NEXT:  ldr     x19, [sp, #24]
+; CHECK-NEXT:  ldp     x29, x30, [sp, #8]
+; CHECK-NEXT:  ldr     d8, [sp], #32
+; CHECK-NEXT:  ret
+
+declare i64 @d()
+declare i64 @e()
+declare i64 @f(i64)