[llvm] r309744 - [AArch64] Rewrite stack frame handling for win64 vararg functions

Hans Wennborg via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 2 10:39:35 PDT 2017


Merged to 5.0 in r309843.

On Tue, Aug 1, 2017 at 2:13 PM, Martin Storsjo via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
> Author: mstorsjo
> Date: Tue Aug  1 14:13:54 2017
> New Revision: 309744
>
> URL: http://llvm.org/viewvc/llvm-project?rev=309744&view=rev
> Log:
> [AArch64] Rewrite stack frame handling for win64 vararg functions
>
> The previous attempt, which made do with a single offset in
> computeCalleeSaveRegisterPairs, wasn't quite enough. The previous
> attempt only worked as long as CombineSPBump == true (since the
> offset would be adjusted later in fixupCalleeSaveRestoreStackOffset).
>
> Instead include the size for the fixed stack area used for win64
> varargs in calculations in emitPrologue/emitEpilogue. The stack
> consists of mainly three parts;
> - AFI->getLocalStackSize()
> - AFI->getCalleeSavedStackSize()
> - FixedObject
>
> Most of the places in the code which previously used the CSStackSize
> now use PrologueSaveSize instead, which is the sum of the latter
> two, while some cases which need exactly the middle one use
> AFI->getCalleeSavedStackSize() explicitly instead of a local variable.
>
> In addition to moving the offsetting into emitPrologue/emitEpilogue
> (which fixes functions with CombineSPBump == false), also set the
> frame pointer to point to the right location, where the frame pointer
> and link register actually are stored. In addition to the prologue/epilogue,
> this also requires changes to resolveFrameIndexReference.
>
> Add tests for a function that keeps a frame pointer and another one
> that uses a VLA.
>
> Differential Revision: https://reviews.llvm.org/D35919
>
> Modified:
>     llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp
>     llvm/trunk/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll
>     llvm/trunk/test/CodeGen/AArch64/win64_vararg.ll
>
> Modified: llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp?rev=309744&r1=309743&r2=309744&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp Tue Aug  1 14:13:54 2017
> @@ -506,19 +506,23 @@ void AArch64FrameLowering::emitPrologue(
>      return;
>    }
>
> -  auto CSStackSize = AFI->getCalleeSavedStackSize();
> +  bool IsWin64 =
> +      Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv());
> +  unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
> +
> +  auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
>    // All of the remaining stack allocations are for locals.
> -  AFI->setLocalStackSize(NumBytes - CSStackSize);
> +  AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
>
>    bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
>    if (CombineSPBump) {
>      emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
>                      MachineInstr::FrameSetup);
>      NumBytes = 0;
> -  } else if (CSStackSize != 0) {
> +  } else if (PrologueSaveSize != 0) {
>      MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(MBB, MBBI, DL, TII,
> -                                                     -CSStackSize);
> -    NumBytes -= CSStackSize;
> +                                                     -PrologueSaveSize);
> +    NumBytes -= PrologueSaveSize;
>    }
>    assert(NumBytes >= 0 && "Negative stack allocation size!?");
>
> @@ -532,8 +536,9 @@ void AArch64FrameLowering::emitPrologue(
>      ++MBBI;
>    }
>    if (HasFP) {
> -    // Only set up FP if we actually need to. Frame pointer is fp = sp - 16.
> -    int FPOffset = CSStackSize - 16;
> +    // Only set up FP if we actually need to. Frame pointer is fp =
> +    // sp - fixedobject - 16.
> +    int FPOffset = AFI->getCalleeSavedStackSize() - 16;
>      if (CombineSPBump)
>        FPOffset += AFI->getLocalStackSize();
>
> @@ -672,8 +677,8 @@ void AArch64FrameLowering::emitPrologue(
>      if (HasFP) {
>        // Define the current CFA rule to use the provided FP.
>        unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
> -      unsigned CFIIndex = MF.addFrameInst(
> -          MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth));
> +      unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
> +          nullptr, Reg, 2 * StackGrowth - FixedObject));
>        BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
>            .addCFIIndex(CFIIndex)
>            .setMIFlags(MachineInstr::FrameSetup);
> @@ -759,12 +764,16 @@ void AArch64FrameLowering::emitEpilogue(
>    // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
>    // it as the 2nd argument of AArch64ISD::TC_RETURN.
>
> -  auto CSStackSize = AFI->getCalleeSavedStackSize();
> +  bool IsWin64 =
> +      Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv());
> +  unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
> +
> +  auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
>    bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
>
> -  if (!CombineSPBump && CSStackSize != 0)
> +  if (!CombineSPBump && PrologueSaveSize != 0)
>      convertCalleeSaveRestoreToSPPrePostIncDec(
> -        MBB, std::prev(MBB.getFirstTerminator()), DL, TII, CSStackSize);
> +        MBB, std::prev(MBB.getFirstTerminator()), DL, TII, PrologueSaveSize);
>
>    // Move past the restores of the callee-saved registers.
>    MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
> @@ -786,7 +795,7 @@ void AArch64FrameLowering::emitEpilogue(
>      return;
>    }
>
> -  NumBytes -= CSStackSize;
> +  NumBytes -= PrologueSaveSize;
>    assert(NumBytes >= 0 && "Negative stack allocation size!?");
>
>    if (!hasFP(MF)) {
> @@ -796,7 +805,7 @@ void AArch64FrameLowering::emitEpilogue(
>      if (RedZone && ArgumentPopSize == 0)
>        return;
>
> -    bool NoCalleeSaveRestore = CSStackSize == 0;
> +    bool NoCalleeSaveRestore = PrologueSaveSize == 0;
>      int StackRestoreBytes = RedZone ? 0 : NumBytes;
>      if (NoCalleeSaveRestore)
>        StackRestoreBytes += ArgumentPopSize;
> @@ -815,7 +824,8 @@ void AArch64FrameLowering::emitEpilogue(
>    // be able to save any instructions.
>    if (MFI.hasVarSizedObjects() || AFI->isStackRealigned())
>      emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
> -                    -CSStackSize + 16, TII, MachineInstr::FrameDestroy);
> +                    -AFI->getCalleeSavedStackSize() + 16, TII,
> +                    MachineInstr::FrameDestroy);
>    else if (NumBytes)
>      emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII,
>                      MachineInstr::FrameDestroy);
> @@ -845,7 +855,11 @@ int AArch64FrameLowering::resolveFrameIn
>    const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
>        MF.getSubtarget().getRegisterInfo());
>    const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
> -  int FPOffset = MFI.getObjectOffset(FI) + 16;
> +  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
> +  bool IsWin64 =
> +      Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv());
> +  unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
> +  int FPOffset = MFI.getObjectOffset(FI) + FixedObject + 16;
>    int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
>    bool isFixed = MFI.isFixedObjectIndex(FI);
>
> @@ -956,12 +970,6 @@ static void computeCalleeSaveRegisterPai
>           "Odd number of callee-saved regs to spill!");
>    int Offset = AFI->getCalleeSavedStackSize();
>
> -  unsigned GPRSaveSize = AFI->getVarArgsGPRSize();
> -  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
> -  bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv());
> -  if (IsWin64)
> -    Offset -= alignTo(GPRSaveSize, 16);
> -
>    for (unsigned i = 0; i < Count; ++i) {
>      RegPairInfo RPI;
>      RPI.Reg1 = CSI[i].getReg();
>
> Modified: llvm/trunk/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll?rev=309744&r1=309743&r2=309744&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll (original)
> +++ llvm/trunk/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll Tue Aug  1 14:13:54 2017
> @@ -2,14 +2,14 @@
>
>  define win64cc void @pass_va(i32 %count, ...) nounwind {
>  entry:
> -; CHECK: sub     sp, sp, #80
> +; CHECK: str     x30, [sp, #-80]!
>  ; CHECK: add     x8, sp, #24
>  ; CHECK: add     x0, sp, #24
>  ; CHECK: stp     x6, x7, [sp, #64]
>  ; CHECK: stp     x4, x5, [sp, #48]
>  ; CHECK: stp     x2, x3, [sp, #32]
>  ; CHECK: str     x1, [sp, #24]
> -; CHECK: stp     x30, x8, [sp]
> +; CHECK: str     x8, [sp, #8]
>  ; CHECK: bl      other_func
>  ; CHECK: ldr     x30, [sp], #80
>  ; CHECK: ret
>
> Modified: llvm/trunk/test/CodeGen/AArch64/win64_vararg.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/win64_vararg.ll?rev=309744&r1=309743&r2=309744&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AArch64/win64_vararg.ll (original)
> +++ llvm/trunk/test/CodeGen/AArch64/win64_vararg.ll Tue Aug  1 14:13:54 2017
> @@ -2,14 +2,14 @@
>
>  define void @pass_va(i32 %count, ...) nounwind {
>  entry:
> -; CHECK: sub     sp, sp, #80
> +; CHECK: str     x30, [sp, #-80]!
>  ; CHECK: add     x8, sp, #24
>  ; CHECK: add     x0, sp, #24
>  ; CHECK: stp     x6, x7, [sp, #64]
>  ; CHECK: stp     x4, x5, [sp, #48]
>  ; CHECK: stp     x2, x3, [sp, #32]
>  ; CHECK: str     x1, [sp, #24]
> -; CHECK: stp     x30, x8, [sp]
> +; CHECK: str     x8, [sp, #8]
>  ; CHECK: bl      other_func
>  ; CHECK: ldr     x30, [sp], #80
>  ; CHECK: ret
> @@ -102,6 +102,113 @@ declare void @llvm.lifetime.end.p0i8(i64
>  declare i32 @__stdio_common_vsprintf(i64, i8*, i64, i8*, i8*, i8*) local_unnamed_addr #3
>  declare i64* @__local_stdio_printf_options() local_unnamed_addr #4
>
> +; CHECK-LABEL: fp
> +; CHECK: str     x21, [sp, #-96]!
> +; CHECK: stp     x20, x19, [sp, #16]
> +; CHECK: stp     x29, x30, [sp, #32]
> +; CHECK: add     x29, sp, #32
> +; CHECK: add     x8, x29, #24
> +; CHECK: mov     x19, x2
> +; CHECK: mov     x20, x1
> +; CHECK: mov     x21, x0
> +; CHECK: stp     x6, x7, [x29, #48]
> +; CHECK: stp     x4, x5, [x29, #32]
> +; CHECK: str     x3, [x29, #24]
> +; CHECK: str     x8, [sp, #8]
> +; CHECK: bl      __local_stdio_printf_options
> +; CHECK: ldr     x8, [x0]
> +; CHECK: add     x5, x29, #24
> +; CHECK: mov     x1, x21
> +; CHECK: mov     x2, x20
> +; CHECK: orr     x0, x8, #0x2
> +; CHECK: mov     x3, x19
> +; CHECK: mov     x4, xzr
> +; CHECK: bl      __stdio_common_vsprintf
> +; CHECK: ldp     x29, x30, [sp, #32]
> +; CHECK: ldp     x20, x19, [sp, #16]
> +; CHECK: cmp     w0, #0
> +; CHECK: csinv   w0, w0, wzr, ge
> +; CHECK: ldr     x21, [sp], #96
> +; CHECK: ret
> +define i32 @fp(i8*, i64, i8*, ...) local_unnamed_addr #6 {
> +  %4 = alloca i8*, align 8
> +  %5 = bitcast i8** %4 to i8*
> +  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %5) #2
> +  call void @llvm.va_start(i8* nonnull %5)
> +  %6 = load i8*, i8** %4, align 8
> +  %7 = call i64* @__local_stdio_printf_options() #2
> +  %8 = load i64, i64* %7, align 8
> +  %9 = or i64 %8, 2
> +  %10 = call i32 @__stdio_common_vsprintf(i64 %9, i8* %0, i64 %1, i8* %2, i8* null, i8* %6) #2
> +  %11 = icmp sgt i32 %10, -1
> +  %12 = select i1 %11, i32 %10, i32 -1
> +  call void @llvm.va_end(i8* nonnull %5)
> +  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %5) #2
> +  ret i32 %12
> +}
> +
> +attributes #6 = { "no-frame-pointer-elim"="true" }
> +
> +; CHECK-LABEL: vla
> +; CHECK: str     x23, [sp, #-112]!
> +; CHECK: stp     x22, x21, [sp, #16]
> +; CHECK: stp     x20, x19, [sp, #32]
> +; CHECK: stp     x29, x30, [sp, #48]
> +; CHECK: add     x29, sp, #48
> +; CHECK: add     x8, x29, #16
> +; CHECK: stur    x8, [x29, #-40]
> +; CHECK: mov     w8, w0
> +; CHECK: add     x8, x8, #15
> +; CHECK: mov     x9, sp
> +; CHECK: and     x8, x8, #0x1fffffff0
> +; CHECK: sub     x20, x9, x8
> +; CHECK: mov     x19, x1
> +; CHECK: mov     x23, sp
> +; CHECK: stp     x6, x7, [x29, #48]
> +; CHECK: stp     x4, x5, [x29, #32]
> +; CHECK: stp     x2, x3, [x29, #16]
> +; CHECK: mov     sp, x20
> +; CHECK: ldur    x21, [x29, #-40]
> +; CHECK: sxtw    x22, w0
> +; CHECK: bl      __local_stdio_printf_options
> +; CHECK: ldr     x8, [x0]
> +; CHECK: mov     x1, x20
> +; CHECK: mov     x2, x22
> +; CHECK: mov     x3, x19
> +; CHECK: orr     x0, x8, #0x2
> +; CHECK: mov     x4, xzr
> +; CHECK: mov     x5, x21
> +; CHECK: bl      __stdio_common_vsprintf
> +; CHECK: mov     sp, x23
> +; CHECK: sub     sp, x29, #48
> +; CHECK: ldp     x29, x30, [sp, #48]
> +; CHECK: ldp     x20, x19, [sp, #32]
> +; CHECK: ldp     x22, x21, [sp, #16]
> +; CHECK: ldr     x23, [sp], #112
> +; CHECK: ret
> +define void @vla(i32, i8*, ...) local_unnamed_addr {
> +  %3 = alloca i8*, align 8
> +  %4 = bitcast i8** %3 to i8*
> +  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %4) #5
> +  call void @llvm.va_start(i8* nonnull %4)
> +  %5 = zext i32 %0 to i64
> +  %6 = call i8* @llvm.stacksave()
> +  %7 = alloca i8, i64 %5, align 1
> +  %8 = load i8*, i8** %3, align 8
> +  %9 = sext i32 %0 to i64
> +  %10 = call i64* @__local_stdio_printf_options()
> +  %11 = load i64, i64* %10, align 8
> +  %12 = or i64 %11, 2
> +  %13 = call i32 @__stdio_common_vsprintf(i64 %12, i8* nonnull %7, i64 %9, i8* %1, i8* null, i8* %8)
> +  call void @llvm.va_end(i8* nonnull %4)
> +  call void @llvm.stackrestore(i8* %6)
> +  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %4) #5
> +  ret void
> +}
> +
> +declare i8* @llvm.stacksave()
> +declare void @llvm.stackrestore(i8*)
> +
>  ; CHECK-LABEL: snprintf
>  ; CHECK: sub     sp,  sp, #96
>  ; CHECK: stp     x21, x20, [sp, #16]
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits


More information about the llvm-commits mailing list