[llvm] r309744 - [AArch64] Rewrite stack frame handling for win64 vararg functions
Hans Wennborg via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 2 10:39:35 PDT 2017
Merged to 5.0 in r309843.
On Tue, Aug 1, 2017 at 2:13 PM, Martin Storsjo via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
> Author: mstorsjo
> Date: Tue Aug 1 14:13:54 2017
> New Revision: 309744
>
> URL: http://llvm.org/viewvc/llvm-project?rev=309744&view=rev
> Log:
> [AArch64] Rewrite stack frame handling for win64 vararg functions
>
> The previous attempt, which made do with a single offset in
> computeCalleeSaveRegisterPairs, wasn't quite enough. The previous
> attempt only worked as long as CombineSPBump == true (since the
> offset would be adjusted later in fixupCalleeSaveRestoreStackOffset).
>
> Instead include the size for the fixed stack area used for win64
> varargs in calculations in emitPrologue/emitEpilogue. The stack
> consists of mainly three parts;
> - AFI->getLocalStackSize()
> - AFI->getCalleeSavedStackSize()
> - FixedObject
>
> Most of the places in the code which previously used the CSStackSize
> now use PrologueSaveSize instead, which is the sum of the latter
> two, while some cases which need exactly the middle one use
> AFI->getCalleeSavedStackSize() explicitly instead of a local variable.
>
> In addition to moving the offsetting into emitPrologue/emitEpilogue
> (which fixes functions with CombineSPBump == false), also set the
> frame pointer to point to the right location, where the frame pointer
> and link register actually are stored. In addition to the prologue/epilogue,
> this also requires changes to resolveFrameIndexReference.
>
> Add tests for a function that keeps a frame pointer and another one
> that uses a VLA.
>
> Differential Revision: https://reviews.llvm.org/D35919
>
> Modified:
> llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp
> llvm/trunk/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll
> llvm/trunk/test/CodeGen/AArch64/win64_vararg.ll
>
> Modified: llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp?rev=309744&r1=309743&r2=309744&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp Tue Aug 1 14:13:54 2017
> @@ -506,19 +506,23 @@ void AArch64FrameLowering::emitPrologue(
> return;
> }
>
> - auto CSStackSize = AFI->getCalleeSavedStackSize();
> + bool IsWin64 =
> + Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv());
> + unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
> +
> + auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
> // All of the remaining stack allocations are for locals.
> - AFI->setLocalStackSize(NumBytes - CSStackSize);
> + AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
>
> bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
> if (CombineSPBump) {
> emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
> MachineInstr::FrameSetup);
> NumBytes = 0;
> - } else if (CSStackSize != 0) {
> + } else if (PrologueSaveSize != 0) {
> MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(MBB, MBBI, DL, TII,
> - -CSStackSize);
> - NumBytes -= CSStackSize;
> + -PrologueSaveSize);
> + NumBytes -= PrologueSaveSize;
> }
> assert(NumBytes >= 0 && "Negative stack allocation size!?");
>
> @@ -532,8 +536,9 @@ void AArch64FrameLowering::emitPrologue(
> ++MBBI;
> }
> if (HasFP) {
> - // Only set up FP if we actually need to. Frame pointer is fp = sp - 16.
> - int FPOffset = CSStackSize - 16;
> + // Only set up FP if we actually need to. Frame pointer is fp =
> + // sp - fixedobject - 16.
> + int FPOffset = AFI->getCalleeSavedStackSize() - 16;
> if (CombineSPBump)
> FPOffset += AFI->getLocalStackSize();
>
> @@ -672,8 +677,8 @@ void AArch64FrameLowering::emitPrologue(
> if (HasFP) {
> // Define the current CFA rule to use the provided FP.
> unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
> - unsigned CFIIndex = MF.addFrameInst(
> - MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth));
> + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
> + nullptr, Reg, 2 * StackGrowth - FixedObject));
> BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
> .addCFIIndex(CFIIndex)
> .setMIFlags(MachineInstr::FrameSetup);
> @@ -759,12 +764,16 @@ void AArch64FrameLowering::emitEpilogue(
> // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
> // it as the 2nd argument of AArch64ISD::TC_RETURN.
>
> - auto CSStackSize = AFI->getCalleeSavedStackSize();
> + bool IsWin64 =
> + Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv());
> + unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
> +
> + auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
> bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
>
> - if (!CombineSPBump && CSStackSize != 0)
> + if (!CombineSPBump && PrologueSaveSize != 0)
> convertCalleeSaveRestoreToSPPrePostIncDec(
> - MBB, std::prev(MBB.getFirstTerminator()), DL, TII, CSStackSize);
> + MBB, std::prev(MBB.getFirstTerminator()), DL, TII, PrologueSaveSize);
>
> // Move past the restores of the callee-saved registers.
> MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
> @@ -786,7 +795,7 @@ void AArch64FrameLowering::emitEpilogue(
> return;
> }
>
> - NumBytes -= CSStackSize;
> + NumBytes -= PrologueSaveSize;
> assert(NumBytes >= 0 && "Negative stack allocation size!?");
>
> if (!hasFP(MF)) {
> @@ -796,7 +805,7 @@ void AArch64FrameLowering::emitEpilogue(
> if (RedZone && ArgumentPopSize == 0)
> return;
>
> - bool NoCalleeSaveRestore = CSStackSize == 0;
> + bool NoCalleeSaveRestore = PrologueSaveSize == 0;
> int StackRestoreBytes = RedZone ? 0 : NumBytes;
> if (NoCalleeSaveRestore)
> StackRestoreBytes += ArgumentPopSize;
> @@ -815,7 +824,8 @@ void AArch64FrameLowering::emitEpilogue(
> // be able to save any instructions.
> if (MFI.hasVarSizedObjects() || AFI->isStackRealigned())
> emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
> - -CSStackSize + 16, TII, MachineInstr::FrameDestroy);
> + -AFI->getCalleeSavedStackSize() + 16, TII,
> + MachineInstr::FrameDestroy);
> else if (NumBytes)
> emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII,
> MachineInstr::FrameDestroy);
> @@ -845,7 +855,11 @@ int AArch64FrameLowering::resolveFrameIn
> const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
> MF.getSubtarget().getRegisterInfo());
> const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
> - int FPOffset = MFI.getObjectOffset(FI) + 16;
> + const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
> + bool IsWin64 =
> + Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv());
> + unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
> + int FPOffset = MFI.getObjectOffset(FI) + FixedObject + 16;
> int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
> bool isFixed = MFI.isFixedObjectIndex(FI);
>
> @@ -956,12 +970,6 @@ static void computeCalleeSaveRegisterPai
> "Odd number of callee-saved regs to spill!");
> int Offset = AFI->getCalleeSavedStackSize();
>
> - unsigned GPRSaveSize = AFI->getVarArgsGPRSize();
> - const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
> - bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv());
> - if (IsWin64)
> - Offset -= alignTo(GPRSaveSize, 16);
> -
> for (unsigned i = 0; i < Count; ++i) {
> RegPairInfo RPI;
> RPI.Reg1 = CSI[i].getReg();
>
> Modified: llvm/trunk/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll?rev=309744&r1=309743&r2=309744&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll (original)
> +++ llvm/trunk/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll Tue Aug 1 14:13:54 2017
> @@ -2,14 +2,14 @@
>
> define win64cc void @pass_va(i32 %count, ...) nounwind {
> entry:
> -; CHECK: sub sp, sp, #80
> +; CHECK: str x30, [sp, #-80]!
> ; CHECK: add x8, sp, #24
> ; CHECK: add x0, sp, #24
> ; CHECK: stp x6, x7, [sp, #64]
> ; CHECK: stp x4, x5, [sp, #48]
> ; CHECK: stp x2, x3, [sp, #32]
> ; CHECK: str x1, [sp, #24]
> -; CHECK: stp x30, x8, [sp]
> +; CHECK: str x8, [sp, #8]
> ; CHECK: bl other_func
> ; CHECK: ldr x30, [sp], #80
> ; CHECK: ret
>
> Modified: llvm/trunk/test/CodeGen/AArch64/win64_vararg.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/win64_vararg.ll?rev=309744&r1=309743&r2=309744&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AArch64/win64_vararg.ll (original)
> +++ llvm/trunk/test/CodeGen/AArch64/win64_vararg.ll Tue Aug 1 14:13:54 2017
> @@ -2,14 +2,14 @@
>
> define void @pass_va(i32 %count, ...) nounwind {
> entry:
> -; CHECK: sub sp, sp, #80
> +; CHECK: str x30, [sp, #-80]!
> ; CHECK: add x8, sp, #24
> ; CHECK: add x0, sp, #24
> ; CHECK: stp x6, x7, [sp, #64]
> ; CHECK: stp x4, x5, [sp, #48]
> ; CHECK: stp x2, x3, [sp, #32]
> ; CHECK: str x1, [sp, #24]
> -; CHECK: stp x30, x8, [sp]
> +; CHECK: str x8, [sp, #8]
> ; CHECK: bl other_func
> ; CHECK: ldr x30, [sp], #80
> ; CHECK: ret
> @@ -102,6 +102,113 @@ declare void @llvm.lifetime.end.p0i8(i64
> declare i32 @__stdio_common_vsprintf(i64, i8*, i64, i8*, i8*, i8*) local_unnamed_addr #3
> declare i64* @__local_stdio_printf_options() local_unnamed_addr #4
>
> +; CHECK-LABEL: fp
> +; CHECK: str x21, [sp, #-96]!
> +; CHECK: stp x20, x19, [sp, #16]
> +; CHECK: stp x29, x30, [sp, #32]
> +; CHECK: add x29, sp, #32
> +; CHECK: add x8, x29, #24
> +; CHECK: mov x19, x2
> +; CHECK: mov x20, x1
> +; CHECK: mov x21, x0
> +; CHECK: stp x6, x7, [x29, #48]
> +; CHECK: stp x4, x5, [x29, #32]
> +; CHECK: str x3, [x29, #24]
> +; CHECK: str x8, [sp, #8]
> +; CHECK: bl __local_stdio_printf_options
> +; CHECK: ldr x8, [x0]
> +; CHECK: add x5, x29, #24
> +; CHECK: mov x1, x21
> +; CHECK: mov x2, x20
> +; CHECK: orr x0, x8, #0x2
> +; CHECK: mov x3, x19
> +; CHECK: mov x4, xzr
> +; CHECK: bl __stdio_common_vsprintf
> +; CHECK: ldp x29, x30, [sp, #32]
> +; CHECK: ldp x20, x19, [sp, #16]
> +; CHECK: cmp w0, #0
> +; CHECK: csinv w0, w0, wzr, ge
> +; CHECK: ldr x21, [sp], #96
> +; CHECK: ret
> +define i32 @fp(i8*, i64, i8*, ...) local_unnamed_addr #6 {
> + %4 = alloca i8*, align 8
> + %5 = bitcast i8** %4 to i8*
> + call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %5) #2
> + call void @llvm.va_start(i8* nonnull %5)
> + %6 = load i8*, i8** %4, align 8
> + %7 = call i64* @__local_stdio_printf_options() #2
> + %8 = load i64, i64* %7, align 8
> + %9 = or i64 %8, 2
> + %10 = call i32 @__stdio_common_vsprintf(i64 %9, i8* %0, i64 %1, i8* %2, i8* null, i8* %6) #2
> + %11 = icmp sgt i32 %10, -1
> + %12 = select i1 %11, i32 %10, i32 -1
> + call void @llvm.va_end(i8* nonnull %5)
> + call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %5) #2
> + ret i32 %12
> +}
> +
> +attributes #6 = { "no-frame-pointer-elim"="true" }
> +
> +; CHECK-LABEL: vla
> +; CHECK: str x23, [sp, #-112]!
> +; CHECK: stp x22, x21, [sp, #16]
> +; CHECK: stp x20, x19, [sp, #32]
> +; CHECK: stp x29, x30, [sp, #48]
> +; CHECK: add x29, sp, #48
> +; CHECK: add x8, x29, #16
> +; CHECK: stur x8, [x29, #-40]
> +; CHECK: mov w8, w0
> +; CHECK: add x8, x8, #15
> +; CHECK: mov x9, sp
> +; CHECK: and x8, x8, #0x1fffffff0
> +; CHECK: sub x20, x9, x8
> +; CHECK: mov x19, x1
> +; CHECK: mov x23, sp
> +; CHECK: stp x6, x7, [x29, #48]
> +; CHECK: stp x4, x5, [x29, #32]
> +; CHECK: stp x2, x3, [x29, #16]
> +; CHECK: mov sp, x20
> +; CHECK: ldur x21, [x29, #-40]
> +; CHECK: sxtw x22, w0
> +; CHECK: bl __local_stdio_printf_options
> +; CHECK: ldr x8, [x0]
> +; CHECK: mov x1, x20
> +; CHECK: mov x2, x22
> +; CHECK: mov x3, x19
> +; CHECK: orr x0, x8, #0x2
> +; CHECK: mov x4, xzr
> +; CHECK: mov x5, x21
> +; CHECK: bl __stdio_common_vsprintf
> +; CHECK: mov sp, x23
> +; CHECK: sub sp, x29, #48
> +; CHECK: ldp x29, x30, [sp, #48]
> +; CHECK: ldp x20, x19, [sp, #32]
> +; CHECK: ldp x22, x21, [sp, #16]
> +; CHECK: ldr x23, [sp], #112
> +; CHECK: ret
> +define void @vla(i32, i8*, ...) local_unnamed_addr {
> + %3 = alloca i8*, align 8
> + %4 = bitcast i8** %3 to i8*
> + call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %4) #5
> + call void @llvm.va_start(i8* nonnull %4)
> + %5 = zext i32 %0 to i64
> + %6 = call i8* @llvm.stacksave()
> + %7 = alloca i8, i64 %5, align 1
> + %8 = load i8*, i8** %3, align 8
> + %9 = sext i32 %0 to i64
> + %10 = call i64* @__local_stdio_printf_options()
> + %11 = load i64, i64* %10, align 8
> + %12 = or i64 %11, 2
> + %13 = call i32 @__stdio_common_vsprintf(i64 %12, i8* nonnull %7, i64 %9, i8* %1, i8* null, i8* %8)
> + call void @llvm.va_end(i8* nonnull %4)
> + call void @llvm.stackrestore(i8* %6)
> + call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %4) #5
> + ret void
> +}
> +
> +declare i8* @llvm.stacksave()
> +declare void @llvm.stackrestore(i8*)
> +
> ; CHECK-LABEL: snprintf
> ; CHECK: sub sp, sp, #96
> ; CHECK: stp x21, x20, [sp, #16]
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list