[llvm] r309744 - [AArch64] Rewrite stack frame handling for win64 vararg functions

Martin Storsjo via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 1 14:13:54 PDT 2017


Author: mstorsjo
Date: Tue Aug  1 14:13:54 2017
New Revision: 309744

URL: http://llvm.org/viewvc/llvm-project?rev=309744&view=rev
Log:
[AArch64] Rewrite stack frame handling for win64 vararg functions

The previous attempt, which made do with a single offset in
computeCalleeSaveRegisterPairs, wasn't quite enough. The previous
attempt only worked as long as CombineSPBump == true (since the
offset would be adjusted later in fixupCalleeSaveRestoreStackOffset).

Instead include the size for the fixed stack area used for win64
varargs in calculations in emitPrologue/emitEpilogue. The stack
consists of mainly three parts;
- AFI->getLocalStackSize()
- AFI->getCalleeSavedStackSize()
- FixedObject

Most of the places in the code which previously used the CSStackSize
now use PrologueSaveSize instead, which is the sum of the latter
two, while some cases which need exactly the middle one use
AFI->getCalleeSavedStackSize() explicitly instead of a local variable.

In addition to moving the offsetting into emitPrologue/emitEpilogue
(which fixes functions with CombineSPBump == false), also set the
frame pointer to point to the right location, where the frame pointer
and link register actually are stored. In addition to the prologue/epilogue,
this also requires changes to resolveFrameIndexReference.

Add tests for a function that keeps a frame pointer and another one
that uses a VLA.

Differential Revision: https://reviews.llvm.org/D35919

Modified:
    llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp
    llvm/trunk/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll
    llvm/trunk/test/CodeGen/AArch64/win64_vararg.ll

Modified: llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp?rev=309744&r1=309743&r2=309744&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp Tue Aug  1 14:13:54 2017
@@ -506,19 +506,23 @@ void AArch64FrameLowering::emitPrologue(
     return;
   }
 
-  auto CSStackSize = AFI->getCalleeSavedStackSize();
+  bool IsWin64 =
+      Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv());
+  unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
+
+  auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
   // All of the remaining stack allocations are for locals.
-  AFI->setLocalStackSize(NumBytes - CSStackSize);
+  AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
 
   bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
   if (CombineSPBump) {
     emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
                     MachineInstr::FrameSetup);
     NumBytes = 0;
-  } else if (CSStackSize != 0) {
+  } else if (PrologueSaveSize != 0) {
     MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(MBB, MBBI, DL, TII,
-                                                     -CSStackSize);
-    NumBytes -= CSStackSize;
+                                                     -PrologueSaveSize);
+    NumBytes -= PrologueSaveSize;
   }
   assert(NumBytes >= 0 && "Negative stack allocation size!?");
 
@@ -532,8 +536,9 @@ void AArch64FrameLowering::emitPrologue(
     ++MBBI;
   }
   if (HasFP) {
-    // Only set up FP if we actually need to. Frame pointer is fp = sp - 16.
-    int FPOffset = CSStackSize - 16;
+    // Only set up FP if we actually need to. Frame pointer is fp =
+    // sp - fixedobject - 16.
+    int FPOffset = AFI->getCalleeSavedStackSize() - 16;
     if (CombineSPBump)
       FPOffset += AFI->getLocalStackSize();
 
@@ -672,8 +677,8 @@ void AArch64FrameLowering::emitPrologue(
     if (HasFP) {
       // Define the current CFA rule to use the provided FP.
       unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
-      unsigned CFIIndex = MF.addFrameInst(
-          MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth));
+      unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
+          nullptr, Reg, 2 * StackGrowth - FixedObject));
       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
           .addCFIIndex(CFIIndex)
           .setMIFlags(MachineInstr::FrameSetup);
@@ -759,12 +764,16 @@ void AArch64FrameLowering::emitEpilogue(
   // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
   // it as the 2nd argument of AArch64ISD::TC_RETURN.
 
-  auto CSStackSize = AFI->getCalleeSavedStackSize();
+  bool IsWin64 =
+      Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv());
+  unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
+
+  auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
   bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
 
-  if (!CombineSPBump && CSStackSize != 0)
+  if (!CombineSPBump && PrologueSaveSize != 0)
     convertCalleeSaveRestoreToSPPrePostIncDec(
-        MBB, std::prev(MBB.getFirstTerminator()), DL, TII, CSStackSize);
+        MBB, std::prev(MBB.getFirstTerminator()), DL, TII, PrologueSaveSize);
 
   // Move past the restores of the callee-saved registers.
   MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
@@ -786,7 +795,7 @@ void AArch64FrameLowering::emitEpilogue(
     return;
   }
 
-  NumBytes -= CSStackSize;
+  NumBytes -= PrologueSaveSize;
   assert(NumBytes >= 0 && "Negative stack allocation size!?");
 
   if (!hasFP(MF)) {
@@ -796,7 +805,7 @@ void AArch64FrameLowering::emitEpilogue(
     if (RedZone && ArgumentPopSize == 0)
       return;
 
-    bool NoCalleeSaveRestore = CSStackSize == 0;
+    bool NoCalleeSaveRestore = PrologueSaveSize == 0;
     int StackRestoreBytes = RedZone ? 0 : NumBytes;
     if (NoCalleeSaveRestore)
       StackRestoreBytes += ArgumentPopSize;
@@ -815,7 +824,8 @@ void AArch64FrameLowering::emitEpilogue(
   // be able to save any instructions.
   if (MFI.hasVarSizedObjects() || AFI->isStackRealigned())
     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
-                    -CSStackSize + 16, TII, MachineInstr::FrameDestroy);
+                    -AFI->getCalleeSavedStackSize() + 16, TII,
+                    MachineInstr::FrameDestroy);
   else if (NumBytes)
     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII,
                     MachineInstr::FrameDestroy);
@@ -845,7 +855,11 @@ int AArch64FrameLowering::resolveFrameIn
   const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
       MF.getSubtarget().getRegisterInfo());
   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
-  int FPOffset = MFI.getObjectOffset(FI) + 16;
+  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+  bool IsWin64 =
+      Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv());
+  unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
+  int FPOffset = MFI.getObjectOffset(FI) + FixedObject + 16;
   int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
   bool isFixed = MFI.isFixedObjectIndex(FI);
 
@@ -956,12 +970,6 @@ static void computeCalleeSaveRegisterPai
          "Odd number of callee-saved regs to spill!");
   int Offset = AFI->getCalleeSavedStackSize();
 
-  unsigned GPRSaveSize = AFI->getVarArgsGPRSize();
-  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
-  bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv());
-  if (IsWin64)
-    Offset -= alignTo(GPRSaveSize, 16);
-
   for (unsigned i = 0; i < Count; ++i) {
     RegPairInfo RPI;
     RPI.Reg1 = CSI[i].getReg();

Modified: llvm/trunk/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll?rev=309744&r1=309743&r2=309744&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll Tue Aug  1 14:13:54 2017
@@ -2,14 +2,14 @@
 
 define win64cc void @pass_va(i32 %count, ...) nounwind {
 entry:
-; CHECK: sub     sp, sp, #80
+; CHECK: str     x30, [sp, #-80]!
 ; CHECK: add     x8, sp, #24
 ; CHECK: add     x0, sp, #24
 ; CHECK: stp     x6, x7, [sp, #64]
 ; CHECK: stp     x4, x5, [sp, #48]
 ; CHECK: stp     x2, x3, [sp, #32]
 ; CHECK: str     x1, [sp, #24]
-; CHECK: stp     x30, x8, [sp]
+; CHECK: str     x8, [sp, #8]
 ; CHECK: bl      other_func
 ; CHECK: ldr     x30, [sp], #80
 ; CHECK: ret

Modified: llvm/trunk/test/CodeGen/AArch64/win64_vararg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/win64_vararg.ll?rev=309744&r1=309743&r2=309744&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/win64_vararg.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/win64_vararg.ll Tue Aug  1 14:13:54 2017
@@ -2,14 +2,14 @@
 
 define void @pass_va(i32 %count, ...) nounwind {
 entry:
-; CHECK: sub     sp, sp, #80
+; CHECK: str     x30, [sp, #-80]!
 ; CHECK: add     x8, sp, #24
 ; CHECK: add     x0, sp, #24
 ; CHECK: stp     x6, x7, [sp, #64]
 ; CHECK: stp     x4, x5, [sp, #48]
 ; CHECK: stp     x2, x3, [sp, #32]
 ; CHECK: str     x1, [sp, #24]
-; CHECK: stp     x30, x8, [sp]
+; CHECK: str     x8, [sp, #8]
 ; CHECK: bl      other_func
 ; CHECK: ldr     x30, [sp], #80
 ; CHECK: ret
@@ -102,6 +102,113 @@ declare void @llvm.lifetime.end.p0i8(i64
 declare i32 @__stdio_common_vsprintf(i64, i8*, i64, i8*, i8*, i8*) local_unnamed_addr #3
 declare i64* @__local_stdio_printf_options() local_unnamed_addr #4
 
+; CHECK-LABEL: fp
+; CHECK: str     x21, [sp, #-96]!
+; CHECK: stp     x20, x19, [sp, #16]
+; CHECK: stp     x29, x30, [sp, #32]
+; CHECK: add     x29, sp, #32
+; CHECK: add     x8, x29, #24
+; CHECK: mov     x19, x2
+; CHECK: mov     x20, x1
+; CHECK: mov     x21, x0
+; CHECK: stp     x6, x7, [x29, #48]
+; CHECK: stp     x4, x5, [x29, #32]
+; CHECK: str     x3, [x29, #24]
+; CHECK: str     x8, [sp, #8]
+; CHECK: bl      __local_stdio_printf_options
+; CHECK: ldr     x8, [x0]
+; CHECK: add     x5, x29, #24
+; CHECK: mov     x1, x21
+; CHECK: mov     x2, x20
+; CHECK: orr     x0, x8, #0x2
+; CHECK: mov     x3, x19
+; CHECK: mov     x4, xzr
+; CHECK: bl      __stdio_common_vsprintf
+; CHECK: ldp     x29, x30, [sp, #32]
+; CHECK: ldp     x20, x19, [sp, #16]
+; CHECK: cmp     w0, #0
+; CHECK: csinv   w0, w0, wzr, ge
+; CHECK: ldr     x21, [sp], #96
+; CHECK: ret
+define i32 @fp(i8*, i64, i8*, ...) local_unnamed_addr #6 {
+  %4 = alloca i8*, align 8
+  %5 = bitcast i8** %4 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %5) #2
+  call void @llvm.va_start(i8* nonnull %5)
+  %6 = load i8*, i8** %4, align 8
+  %7 = call i64* @__local_stdio_printf_options() #2
+  %8 = load i64, i64* %7, align 8
+  %9 = or i64 %8, 2
+  %10 = call i32 @__stdio_common_vsprintf(i64 %9, i8* %0, i64 %1, i8* %2, i8* null, i8* %6) #2
+  %11 = icmp sgt i32 %10, -1
+  %12 = select i1 %11, i32 %10, i32 -1
+  call void @llvm.va_end(i8* nonnull %5)
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %5) #2
+  ret i32 %12
+}
+
+attributes #6 = { "no-frame-pointer-elim"="true" }
+
+; CHECK-LABEL: vla
+; CHECK: str     x23, [sp, #-112]!
+; CHECK: stp     x22, x21, [sp, #16]
+; CHECK: stp     x20, x19, [sp, #32]
+; CHECK: stp     x29, x30, [sp, #48]
+; CHECK: add     x29, sp, #48
+; CHECK: add     x8, x29, #16
+; CHECK: stur    x8, [x29, #-40]
+; CHECK: mov     w8, w0
+; CHECK: add     x8, x8, #15
+; CHECK: mov     x9, sp
+; CHECK: and     x8, x8, #0x1fffffff0
+; CHECK: sub     x20, x9, x8
+; CHECK: mov     x19, x1
+; CHECK: mov     x23, sp
+; CHECK: stp     x6, x7, [x29, #48]
+; CHECK: stp     x4, x5, [x29, #32]
+; CHECK: stp     x2, x3, [x29, #16]
+; CHECK: mov     sp, x20
+; CHECK: ldur    x21, [x29, #-40]
+; CHECK: sxtw    x22, w0
+; CHECK: bl      __local_stdio_printf_options
+; CHECK: ldr     x8, [x0]
+; CHECK: mov     x1, x20
+; CHECK: mov     x2, x22
+; CHECK: mov     x3, x19
+; CHECK: orr     x0, x8, #0x2
+; CHECK: mov     x4, xzr
+; CHECK: mov     x5, x21
+; CHECK: bl      __stdio_common_vsprintf
+; CHECK: mov     sp, x23
+; CHECK: sub     sp, x29, #48
+; CHECK: ldp     x29, x30, [sp, #48]
+; CHECK: ldp     x20, x19, [sp, #32]
+; CHECK: ldp     x22, x21, [sp, #16]
+; CHECK: ldr     x23, [sp], #112
+; CHECK: ret
+define void @vla(i32, i8*, ...) local_unnamed_addr {
+  %3 = alloca i8*, align 8
+  %4 = bitcast i8** %3 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %4) #5
+  call void @llvm.va_start(i8* nonnull %4)
+  %5 = zext i32 %0 to i64
+  %6 = call i8* @llvm.stacksave()
+  %7 = alloca i8, i64 %5, align 1
+  %8 = load i8*, i8** %3, align 8
+  %9 = sext i32 %0 to i64
+  %10 = call i64* @__local_stdio_printf_options()
+  %11 = load i64, i64* %10, align 8
+  %12 = or i64 %11, 2
+  %13 = call i32 @__stdio_common_vsprintf(i64 %12, i8* nonnull %7, i64 %9, i8* %1, i8* null, i8* %8)
+  call void @llvm.va_end(i8* nonnull %4)
+  call void @llvm.stackrestore(i8* %6)
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %4) #5
+  ret void
+}
+
+declare i8* @llvm.stacksave()
+declare void @llvm.stackrestore(i8*)
+
 ; CHECK-LABEL: snprintf
 ; CHECK: sub     sp,  sp, #96
 ; CHECK: stp     x21, x20, [sp, #16]




More information about the llvm-commits mailing list