[llvm] r228641 - X86: Emit an ABI compliant prologue and epilogue for Win64

David Majnemer david.majnemer at gmail.com
Mon Feb 9 16:57:43 PST 2015


Author: majnemer
Date: Mon Feb  9 18:57:42 2015
New Revision: 228641

URL: http://llvm.org/viewvc/llvm-project?rev=228641&view=rev
Log:
X86: Emit an ABI compliant prologue and epilogue for Win64

Win64 has specific contraints on what valid prologues and epilogues look
like.  This constraint is born from the flexibility and descriptiveness
of Win64's unwind opcodes.

Prologues previously emitted by LLVM could not be represented by the
unwind opcodes, preventing operations powered by stack unwinding to
successfully work.

Differential Revision: http://reviews.llvm.org/D7520

Added:
    llvm/trunk/test/CodeGen/X86/win64_alloca_dynalloca.s
    llvm/trunk/test/CodeGen/X86/win64_eh.s
    llvm/trunk/test/CodeGen/X86/win64_frame.ll
Modified:
    llvm/trunk/lib/Target/X86/X86FrameLowering.cpp
    llvm/trunk/test/CodeGen/X86/frameaddr.ll
    llvm/trunk/test/CodeGen/X86/gcc_except_table.ll
    llvm/trunk/test/CodeGen/X86/win64_alloca_dynalloca.ll
    llvm/trunk/test/CodeGen/X86/win64_eh.ll

Modified: llvm/trunk/lib/Target/X86/X86FrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FrameLowering.cpp?rev=228641&r1=228640&r2=228641&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86FrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86FrameLowering.cpp Mon Feb  9 18:57:42 2015
@@ -484,6 +484,35 @@ void X86FrameLowering::emitStackProbeCal
   }
 }
 
+static unsigned calculateSetFPREG(uint64_t SPAdjust) {
+  // Win64 ABI has a less restrictive limitation of 240; 128 works equally well
+  // and might require smaller successive adjustments.
+  const uint64_t Win64MaxSEHOffset = 128;
+  uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset);
+  // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode.
+  return static_cast<unsigned>(RoundUpToAlignment(SEHFrameOffset, 16));
+}
+
+// If we're forcing a stack realignment we can't rely on just the frame
+// info, we need to know the ABI stack alignment as well in case we
+// have a call out.  Otherwise just make sure we have some alignment - we'll
+// go with the minimum SlotSize.
+static uint64_t calculateMaxStackAlign(const MachineFunction &MF) {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment.
+  const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
+  const X86RegisterInfo *RegInfo = STI.getRegisterInfo();
+  unsigned SlotSize = RegInfo->getSlotSize();
+  unsigned StackAlign = STI.getFrameLowering()->getStackAlignment();
+  if (ForceStackAlign) {
+    if (MFI->hasCalls())
+      MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
+    else if (MaxAlign < SlotSize)
+      MaxAlign = SlotSize;
+  }
+  return MaxAlign;
+}
+
 /// emitPrologue - Push callee-saved registers onto the stack, which
 /// automatically adjust the stack pointer. Adjust the stack pointer to allocate
 /// space for local variables. Also emit labels used by the exception handler to
@@ -578,7 +607,7 @@ void X86FrameLowering::emitPrologue(Mach
   const TargetInstrInfo &TII = *STI.getInstrInfo();
   MachineModuleInfo &MMI = MF.getMMI();
   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
-  uint64_t MaxAlign  = MFI->getMaxAlignment(); // Desired stack alignment.
+  uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
   uint64_t StackSize = MFI->getStackSize();    // Number of bytes to allocate.
   bool HasFP = hasFP(MF);
   bool Is64Bit = STI.is64Bit();
@@ -591,7 +620,6 @@ void X86FrameLowering::emitPrologue(Mach
   bool NeedsDwarfCFI =
       !IsWinEH && (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry());
   bool UseLEA = STI.useLeaForSP();
-  unsigned StackAlign = getStackAlignment();
   unsigned SlotSize = RegInfo->getSlotSize();
   unsigned FramePtr = RegInfo->getFrameRegister(MF);
   const unsigned MachineFramePtr =
@@ -602,19 +630,11 @@ void X86FrameLowering::emitPrologue(Mach
   unsigned BasePtr = RegInfo->getBaseRegister();
   DebugLoc DL;
 
-  // If we're forcing a stack realignment we can't rely on just the frame
-  // info, we need to know the ABI stack alignment as well in case we
-  // have a call out.  Otherwise just make sure we have some alignment - we'll
-  // go with the minimum SlotSize.
-  if (ForceStackAlign) {
-    if (MFI->hasCalls())
-      MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
-    else if (MaxAlign < SlotSize)
-      MaxAlign = SlotSize;
-  }
-
   // Add RETADDR move area to callee saved frame size.
   int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+  if (TailCallReturnAddrDelta && IsWinEH)
+    report_fatal_error("Can't handle guaranteed tail call under win64 yet");
+
   if (TailCallReturnAddrDelta < 0)
     X86FI->setCalleeSavedFrameSize(
       X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
@@ -689,7 +709,7 @@ void X86FrameLowering::emitPrologue(Mach
       // Callee-saved registers are pushed on stack before the stack
       // is realigned.
       FrameSize -= X86FI->getCalleeSavedFrameSize();
-      NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
+      NumBytes = RoundUpToAlignment(FrameSize, MaxAlign);
     } else {
       NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
     }
@@ -728,11 +748,14 @@ void X86FrameLowering::emitPrologue(Mach
           .setMIFlag(MachineInstr::FrameSetup);
     }
 
-    // Update EBP with the new base value.
-    BuildMI(MBB, MBBI, DL,
-            TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), FramePtr)
-        .addReg(StackPtr)
-        .setMIFlag(MachineInstr::FrameSetup);
+    if (!IsWinEH) {
+      // Update EBP with the new base value.
+      BuildMI(MBB, MBBI, DL,
+              TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
+              FramePtr)
+          .addReg(StackPtr)
+          .setMIFlag(MachineInstr::FrameSetup);
+    }
 
     if (NeedsDwarfCFI) {
       // Mark effective beginning of when frame pointer becomes valid.
@@ -781,15 +804,16 @@ void X86FrameLowering::emitPrologue(Mach
 
   // Realign stack after we pushed callee-saved registers (so that we'll be
   // able to calculate their offsets from the frame pointer).
-  if (RegInfo->needsStackRealignment(MF)) {
+  // Don't do this for Win64, it needs to realign the stack after the prologue.
+  if (!IsWinEH && RegInfo->needsStackRealignment(MF)) {
     assert(HasFP && "There should be a frame pointer if stack is realigned.");
     uint64_t Val = -MaxAlign;
     MachineInstr *MI =
-      BuildMI(MBB, MBBI, DL,
-              TII.get(getANDriOpcode(Uses64BitFramePtr, Val)), StackPtr)
-      .addReg(StackPtr)
-      .addImm(Val)
-      .setMIFlag(MachineInstr::FrameSetup);
+        BuildMI(MBB, MBBI, DL, TII.get(getANDriOpcode(Uses64BitFramePtr, Val)),
+                StackPtr)
+            .addReg(StackPtr)
+            .addImm(Val)
+            .setMIFlag(MachineInstr::FrameSetup);
 
     // The EFLAGS implicit def is dead.
     MI->getOperand(3).setIsDead();
@@ -867,50 +891,28 @@ void X86FrameLowering::emitPrologue(Mach
                  UseLEA, TII, *RegInfo);
   }
 
-  int SEHFrameOffset = 0;
-  if (NeedsWinEH) {
-    if (HasFP) {
-      // We need to set frame base offset low enough such that all saved
-      // register offsets would be positive relative to it, but we can't
-      // just use NumBytes, because .seh_setframe offset must be <=240.
-      // So we pretend to have only allocated enough space to spill the
-      // non-volatile registers.
-      // We don't care about the rest of stack allocation, because unwinder
-      // will restore SP to (BP - SEHFrameOffset)
-      for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) {
-        int offset = MFI->getObjectOffset(Info.getFrameIdx());
-        SEHFrameOffset = std::max(SEHFrameOffset, std::abs(offset));
-      }
-      SEHFrameOffset += SEHFrameOffset % 16; // ensure alignmant
+  if (NeedsWinEH && NumBytes)
+    BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
+        .addImm(NumBytes)
+        .setMIFlag(MachineInstr::FrameSetup);
 
-      // This only needs to account for XMM spill slots, GPR slots
-      // are covered by the .seh_pushreg's emitted above.
-      unsigned Size = SEHFrameOffset - X86FI->getCalleeSavedFrameSize();
-      if (Size) {
-        BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
-            .addImm(Size)
-            .setMIFlag(MachineInstr::FrameSetup);
-      }
+  int SEHFrameOffset = 0;
+  if (IsWinEH && HasFP) {
+    SEHFrameOffset = calculateSetFPREG(NumBytes);
+    addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr),
+                 StackPtr, false, SEHFrameOffset);
 
+    if (NeedsWinEH)
       BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
           .addImm(FramePtr)
           .addImm(SEHFrameOffset)
           .setMIFlag(MachineInstr::FrameSetup);
-    } else {
-      // SP will be the base register for restoring XMMs
-      if (NumBytes) {
-        BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
-            .addImm(NumBytes)
-            .setMIFlag(MachineInstr::FrameSetup);
-      }
-    }
   }
 
   // Skip the rest of register spilling code
   while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
     ++MBBI;
 
-  // Emit SEH info for non-GPRs
   if (NeedsWinEH) {
     for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) {
       unsigned Reg = Info.getReg();
@@ -931,6 +933,23 @@ void X86FrameLowering::emitPrologue(Mach
         .setMIFlag(MachineInstr::FrameSetup);
   }
 
+  // Realign stack after we spilled callee-saved registers (so that we'll be
+  // able to calculate their offsets from the frame pointer).
+  // Win64 requires aligning the stack after the prologue.
+  if (IsWinEH && RegInfo->needsStackRealignment(MF)) {
+    assert(HasFP && "There should be a frame pointer if stack is realigned.");
+    uint64_t Val = -MaxAlign;
+    MachineInstr *MI =
+        BuildMI(MBB, MBBI, DL, TII.get(getANDriOpcode(Uses64BitFramePtr, Val)),
+                StackPtr)
+            .addReg(StackPtr)
+            .addImm(Val)
+            .setMIFlag(MachineInstr::FrameSetup);
+
+    // The EFLAGS implicit def is dead.
+    MI->getOperand(3).setIsDead();
+  }
+
   // If we need a base pointer, set it up here. It's whatever the value
   // of the stack pointer is at this point. Any variable size objects
   // will be allocated after this, so we can still use the base pointer
@@ -986,7 +1005,6 @@ void X86FrameLowering::emitEpilogue(Mach
   const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
   const bool Is64BitILP32 = STI.isTarget64BitILP32();
   bool UseLEA = STI.useLeaForSP();
-  unsigned StackAlign = getStackAlignment();
   unsigned SlotSize = RegInfo->getSlotSize();
   unsigned FramePtr = RegInfo->getFrameRegister(MF);
   unsigned MachineFramePtr =
@@ -1017,21 +1035,10 @@ void X86FrameLowering::emitEpilogue(Mach
 
   // Get the number of bytes to allocate from the FrameInfo.
   uint64_t StackSize = MFI->getStackSize();
-  uint64_t MaxAlign  = MFI->getMaxAlignment();
+  uint64_t MaxAlign = calculateMaxStackAlign(MF);
   unsigned CSSize = X86FI->getCalleeSavedFrameSize();
   uint64_t NumBytes = 0;
 
-  // If we're forcing a stack realignment we can't rely on just the frame
-  // info, we need to know the ABI stack alignment as well in case we
-  // have a call out.  Otherwise just make sure we have some alignment - we'll
-  // go with the minimum.
-  if (ForceStackAlign) {
-    if (MFI->hasCalls())
-      MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
-    else
-      MaxAlign = MaxAlign ? MaxAlign : 4;
-  }
-
   if (hasFP(MF)) {
     // Calculate required stack adjustment.
     uint64_t FrameSize = StackSize - SlotSize;
@@ -1050,6 +1057,7 @@ void X86FrameLowering::emitEpilogue(Mach
   } else {
     NumBytes = StackSize - CSSize;
   }
+  uint64_t SEHStackAllocAmt = NumBytes;
 
   // Skip the callee-saved pop instructions.
   while (MBBI != MBB.begin()) {
@@ -1077,7 +1085,12 @@ void X86FrameLowering::emitEpilogue(Mach
   if (RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) {
     if (RegInfo->needsStackRealignment(MF))
       MBBI = FirstCSPop;
-    if (CSSize != 0) {
+    if (IsWinEH) {
+      unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt);
+      addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), StackPtr),
+                   FramePtr, false, SEHStackAllocAmt - SEHFrameOffset);
+      --MBBI;
+    } else if (CSSize != 0) {
       unsigned Opc = getLEArOpcode(Uses64BitFramePtr);
       addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
                    FramePtr, false, -CSSize);
@@ -1195,14 +1208,53 @@ int X86FrameLowering::getFrameIndexOffse
   const X86RegisterInfo *RegInfo =
       MF.getSubtarget<X86Subtarget>().getRegisterInfo();
   const MachineFrameInfo *MFI = MF.getFrameInfo();
+  // Offset will hold the offset from the stack pointer at function entry to the
+  // object.
+  // We need to factor in additional offsets applied during the prologue to the
+  // frame, base, and stack pointer depending on which is used.
   int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
+  const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+  unsigned CSSize = X86FI->getCalleeSavedFrameSize();
   uint64_t StackSize = MFI->getStackSize();
+  unsigned SlotSize = RegInfo->getSlotSize();
+  bool HasFP = hasFP(MF);
+  bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
+  int64_t FPDelta = 0;
+
+  if (IsWinEH) {
+    uint64_t NumBytes = 0;
+    // Calculate required stack adjustment.
+    uint64_t FrameSize = StackSize - SlotSize;
+    // If required, include space for extra hidden slot for stashing base pointer.
+    if (X86FI->getRestoreBasePointer())
+      FrameSize += SlotSize;
+    uint64_t SEHStackAllocAmt = StackSize;
+    if (RegInfo->needsStackRealignment(MF)) {
+      // Callee-saved registers are pushed on stack before the stack
+      // is realigned.
+      FrameSize -= CSSize;
+
+      uint64_t MaxAlign =
+          calculateMaxStackAlign(MF); // Desired stack alignment.
+      NumBytes = RoundUpToAlignment(FrameSize, MaxAlign);
+      SEHStackAllocAmt = RoundUpToAlignment(SEHStackAllocAmt, 16);
+    } else {
+      NumBytes = FrameSize - CSSize;
+    }
+    uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes);
+    // FPDelta is the offset from the "traditional" FP location of the old base
+    // pointer followed by return address and the location required by the
+    // restricted Win64 prologue.
+    // Add FPDelta to all offsets below that go through the frame pointer.
+    FPDelta = SEHStackAllocAmt - SEHFrameOffset;
+  }
+
 
   if (RegInfo->hasBasePointer(MF)) {
-    assert (hasFP(MF) && "VLAs and dynamic stack realign, but no FP?!");
+    assert(HasFP && "VLAs and dynamic stack realign, but no FP?!");
     if (FI < 0) {
       // Skip the saved EBP.
-      return Offset + RegInfo->getSlotSize();
+      return Offset + SlotSize + FPDelta;
     } else {
       assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
       return Offset + StackSize;
@@ -1210,21 +1262,22 @@ int X86FrameLowering::getFrameIndexOffse
   } else if (RegInfo->needsStackRealignment(MF)) {
     if (FI < 0) {
       // Skip the saved EBP.
-      return Offset + RegInfo->getSlotSize();
+      return Offset + SlotSize + FPDelta;
     } else {
       assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
       return Offset + StackSize;
     }
     // FIXME: Support tail calls
   } else {
-    if (!hasFP(MF))
+    if (!HasFP)
       return Offset + StackSize;
+    if (IsWinEH)
+      return Offset + FPDelta;
 
     // Skip the saved EBP.
-    Offset += RegInfo->getSlotSize();
+    Offset += SlotSize;
 
     // Skip the RETADDR move area
-    const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
     int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
     if (TailCallReturnAddrDelta < 0)
       Offset -= TailCallReturnAddrDelta;
@@ -1959,8 +2012,8 @@ eliminateCallFramePseudoInstr(MachineFun
     // We need to keep the stack aligned properly.  To do this, we round the
     // amount of space needed for the outgoing arguments up to the next
     // alignment boundary.
-    unsigned StackAlign = STI.getFrameLowering()->getStackAlignment();
-    Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
+    unsigned StackAlign = getStackAlignment();
+    Amount = RoundUpToAlignment(Amount, StackAlign);
 
     MachineInstr *New = nullptr;
 

Modified: llvm/trunk/test/CodeGen/X86/frameaddr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/frameaddr.ll?rev=228641&r1=228640&r2=228641&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/frameaddr.ll (original)
+++ llvm/trunk/test/CodeGen/X86/frameaddr.ll Mon Feb  9 18:57:42 2015
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86                                | FileCheck %s --check-prefix=CHECK-32
 ; RUN: llc < %s -march=x86    -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=CHECK-32
-; RUN: llc < %s -march=x86-64                             | FileCheck %s --check-prefix=CHECK-64
-; RUN: llc < %s -march=x86-64 -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=CHECK-64
+; RUN: llc < %s -mtriple=x86_64-unknown                             | FileCheck %s --check-prefix=CHECK-64
+; RUN: llc < %s -mtriple=x86_64-unknown -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=CHECK-64
 ; RUN: llc < %s -mtriple=x86_64-gnux32                    | FileCheck %s --check-prefix=CHECK-X32ABI
 ; RUN: llc < %s -mtriple=x86_64-gnux32 -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=CHECK-X32ABI
 ; RUN: llc < %s -mtriple=x86_64-nacl                    | FileCheck %s --check-prefix=CHECK-NACL64

Modified: llvm/trunk/test/CodeGen/X86/gcc_except_table.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/gcc_except_table.ll?rev=228641&r1=228640&r2=228641&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/gcc_except_table.ll (original)
+++ llvm/trunk/test/CodeGen/X86/gcc_except_table.ll Mon Feb  9 18:57:42 2015
@@ -15,7 +15,7 @@ define i32 @main() uwtable optsize ssp {
 
 ; MINGW64: .seh_proc
 ; MINGW64: .seh_handler __gxx_personality_v0
-; MINGW64: .seh_setframe 5, 0
+; MINGW64: .seh_setframe 5, 32
 ; MINGW64: callq _Unwind_Resume
 ; MINGW64: .seh_handlerdata
 ; MINGW64: GCC_except_table0:

Modified: llvm/trunk/test/CodeGen/X86/win64_alloca_dynalloca.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/win64_alloca_dynalloca.ll?rev=228641&r1=228640&r2=228641&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/win64_alloca_dynalloca.ll (original)
+++ llvm/trunk/test/CodeGen/X86/win64_alloca_dynalloca.ll Mon Feb  9 18:57:42 2015
@@ -14,26 +14,24 @@ entry:
   %buf0 = alloca i8, i64 4096, align 1
 
 ; ___chkstk_ms does not adjust %rsp.
-; M64: movq  %rsp, %rbp
 ; M64:       $4096, %rax
 ; M64: callq ___chkstk_ms
 ; M64: subq  %rax, %rsp
+; M64: leaq 128(%rsp), %rbp
 
 ; __chkstk does not adjust %rsp.
-; W64: movq  %rsp, %rbp
 ; W64:       $4096, %rax
 ; W64: callq __chkstk
 ; W64: subq  %rax, %rsp
+; W64: leaq 128(%rsp), %rbp
 
 ; Use %r11 for the large model.
-; L64: movq  %rsp, %rbp
 ; L64:       $4096, %rax
 ; L64: movabsq $__chkstk, %r11
 ; L64: callq *%r11
 ; L64: subq  %rax, %rsp
 
 ; Freestanding
-; EFI: movq  %rsp, %rbp
 ; EFI:       $[[B0OFS:4096|4104]], %rsp
 ; EFI-NOT:   call
 
@@ -68,12 +66,12 @@ entry:
 
 ; M64: subq  $48, %rsp
 ; M64: movq  %rax, 32(%rsp)
-; M64: leaq  -4096(%rbp), %r9
+; M64: leaq  -128(%rbp), %r9
 ; M64: callq bar
 
 ; W64: subq  $48, %rsp
 ; W64: movq  %rax, 32(%rsp)
-; W64: leaq  -4096(%rbp), %r9
+; W64: leaq  -128(%rbp), %r9
 ; W64: callq bar
 
 ; EFI: subq  $48, %rsp
@@ -83,9 +81,9 @@ entry:
 
   ret i64 %r
 
-; M64: movq    %rbp, %rsp
+; M64: leaq    3968(%rbp), %rsp
 
-; W64: movq    %rbp, %rsp
+; W64: leaq    3968(%rbp), %rsp
 
 }
 

Added: llvm/trunk/test/CodeGen/X86/win64_alloca_dynalloca.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/win64_alloca_dynalloca.s?rev=228641&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/win64_alloca_dynalloca.s (added)
+++ llvm/trunk/test/CodeGen/X86/win64_alloca_dynalloca.s Mon Feb  9 18:57:42 2015
@@ -0,0 +1,29 @@
+	.text
+	.def	 unaligned;
+	.scl	2;
+	.type	32;
+	.endef
+	.globl	unaligned
+	.align	16, 0x90
+unaligned:                              # @unaligned
+# BB#0:                                 # %entry
+	pushq	%rbp
+	movabsq	$4096, %rax             # imm = 0x1000
+	callq	__chkstk
+	subq	%rax, %rsp
+	leaq	128(%rsp), %rbp
+	leaq	15(%rcx), %rax
+	andq	$-16, %rax
+	callq	__chkstk
+	subq	%rax, %rsp
+	movq	%rsp, %rax
+	subq	$48, %rsp
+	movq	%rax, 32(%rsp)
+	leaq	-128(%rbp), %r9
+	movq	%rcx, %r8
+	callq	bar
+	leaq	4016(%rbp), %rsp
+	popq	%rbp
+	retq
+
+

Modified: llvm/trunk/test/CodeGen/X86/win64_eh.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/win64_eh.ll?rev=228641&r1=228640&r2=228641&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/win64_eh.ll (original)
+++ llvm/trunk/test/CodeGen/X86/win64_eh.ll Mon Feb  9 18:57:42 2015
@@ -146,23 +146,23 @@ entry:
 ; WIN64: .seh_proc foo5
 ; WIN64: pushq %rbp
 ; WIN64: .seh_pushreg 5
-; WIN64: movq  %rsp, %rbp
 ; WIN64: pushq %rdi
 ; WIN64: .seh_pushreg 7
 ; WIN64: pushq %rbx
 ; WIN64: .seh_pushreg 3
-; WIN64: andq  $-64, %rsp
 ; WIN64: subq  $128, %rsp
-; WIN64: .seh_stackalloc 48
-; WIN64: .seh_setframe 5, 64
+; WIN64: .seh_stackalloc 128
+; WIN64: leaq  128(%rsp), %rbp
+; WIN64: .seh_setframe 5, 128
 ; WIN64: movaps  %xmm7, -32(%rbp)        # 16-byte Spill
 ; WIN64: movaps  %xmm6, -48(%rbp)        # 16-byte Spill
-; WIN64: .seh_savexmm 6, 16
-; WIN64: .seh_savexmm 7, 32
+; WIN64: .seh_savexmm 6, 80
+; WIN64: .seh_savexmm 7, 96
 ; WIN64: .seh_endprologue
+; WIN64: andq  $-64, %rsp
 ; WIN64: movaps  -48(%rbp), %xmm6        # 16-byte Reload
 ; WIN64: movaps  -32(%rbp), %xmm7        # 16-byte Reload
-; WIN64: leaq  -16(%rbp), %rsp
+; WIN64: leaq  (%rbp), %rsp
 ; WIN64: popq  %rbx
 ; WIN64: popq  %rdi
 ; WIN64: popq  %rbp

Added: llvm/trunk/test/CodeGen/X86/win64_eh.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/win64_eh.s?rev=228641&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/win64_eh.s (added)
+++ llvm/trunk/test/CodeGen/X86/win64_eh.s Mon Feb  9 18:57:42 2015
@@ -0,0 +1,50 @@
+	.text
+	.def	 foo5;
+	.scl	2;
+	.type	32;
+	.endef
+	.globl	foo5
+	.align	16, 0x90
+foo5:                                   # @foo5
+.Ltmp0:
+.seh_proc foo5
+# BB#0:                                 # %entry
+	pushq	%rbp
+.Ltmp1:
+	.seh_pushreg 5
+	pushq	%rdi
+.Ltmp2:
+	.seh_pushreg 7
+	pushq	%rbx
+.Ltmp3:
+	.seh_pushreg 3
+	subq	$384, %rsp              # imm = 0x180
+.Ltmp4:
+	.seh_stackalloc 384
+	leaq	128(%rsp), %rbp
+.Ltmp5:
+	.seh_setframe 5, 128
+	movaps	%xmm7, -32(%rbp)        # 16-byte Spill
+	movaps	%xmm6, -48(%rbp)        # 16-byte Spill
+.Ltmp6:
+	.seh_savexmm 6, 80
+.Ltmp7:
+	.seh_savexmm 7, 96
+.Ltmp8:
+	.seh_endprologue
+	andq	$-64, %rsp
+	#APP
+	#NO_APP
+	movl	$42, (%rsp)
+	movaps	-48(%rbp), %xmm6        # 16-byte Reload
+	movaps	-32(%rbp), %xmm7        # 16-byte Reload
+	leaq	256(%rbp), %rsp
+	popq	%rbx
+	popq	%rdi
+	popq	%rbp
+	retq
+.Leh_func_end0:
+.Ltmp9:
+	.seh_endproc
+
+

Added: llvm/trunk/test/CodeGen/X86/win64_frame.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/win64_frame.ll?rev=228641&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/win64_frame.ll (added)
+++ llvm/trunk/test/CodeGen/X86/win64_frame.ll Mon Feb  9 18:57:42 2015
@@ -0,0 +1,121 @@
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s
+
+define i32 @f1(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) "no-frame-pointer-elim"="true" {
+  ; CHECK-LABEL: f1:
+  ; CHECK:       movl    48(%rbp), %eax
+  ret i32 %p5
+}
+
+define void @f2(i32 %p, ...) "no-frame-pointer-elim"="true" {
+  ; CHECK-LABEL: f2:
+  ; CHECK:      .seh_stackalloc 8
+  ; CHECK:      leaq    16(%rsp), %rbp
+  ; CHECK:      .seh_setframe 5, 16
+  ; CHECK:      movq    %rdx, 16(%rbp)
+  ; CHECK:      leaq    16(%rbp), %rax
+  %ap = alloca i8, align 8
+  call void @llvm.va_start(i8* %ap)
+  ret void
+}
+
+define i8* @f3() "no-frame-pointer-elim"="true" {
+  ; CHECK-LABEL: f3:
+  ; CHECK:      leaq    (%rsp), %rbp
+  ; CHECK:      .seh_setframe 5, 0
+  ; CHECK:      movq    8(%rbp), %rax
+  %ra = call i8* @llvm.returnaddress(i32 0)
+  ret i8* %ra
+}
+
+define i8* @f4() "no-frame-pointer-elim"="true" {
+  ; CHECK-LABEL: f4:
+  ; CHECK:      pushq   %rbp
+  ; CHECK:      .seh_pushreg 5
+  ; CHECK:      subq    $304, %rsp
+  ; CHECK:      .seh_stackalloc 304
+  ; CHECK:      leaq    128(%rsp), %rbp
+  ; CHECK:      .seh_setframe 5, 128
+  ; CHECK:      .seh_endprologue
+  ; CHECK:      movq    184(%rbp), %rax
+  alloca [300 x i8]
+  %ra = call i8* @llvm.returnaddress(i32 0)
+  ret i8* %ra
+}
+
+declare void @external(i8*)
+
+define void @f5() "no-frame-pointer-elim"="true" {
+  ; CHECK-LABEL: f5:
+  ; CHECK:      subq    $336, %rsp
+  ; CHECK:      .seh_stackalloc 336
+  ; CHECK:      leaq    128(%rsp), %rbp
+  ; CHECK:      .seh_setframe 5, 128
+  ; CHECK:      leaq    -92(%rbp), %rcx
+  ; CHECK:      callq   external
+  %a = alloca [300 x i8]
+  %gep = getelementptr [300 x i8]* %a, i32 0, i32 0
+  call void @external(i8* %gep)
+  ret void
+}
+
+define void @f6(i32 %p, ...) "no-frame-pointer-elim"="true" {
+  ; CHECK-LABEL: f6:
+  ; CHECK:      subq    $336, %rsp
+  ; CHECK:      .seh_stackalloc 336
+  ; CHECK:      leaq    128(%rsp), %rbp
+  ; CHECK:      .seh_setframe 5, 128
+  ; CHECK:      leaq    -92(%rbp), %rcx
+  ; CHECK:      callq   external
+  %a = alloca [300 x i8]
+  %gep = getelementptr [300 x i8]* %a, i32 0, i32 0
+  call void @external(i8* %gep)
+  ret void
+}
+
+define i32 @f7(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) "no-frame-pointer-elim"="true" {
+  ; CHECK-LABEL: f7:
+  ; CHECK:      pushq   %rbp
+  ; CHECK:      .seh_pushreg 5
+  ; CHECK:      subq    $320, %rsp
+  ; CHECK:      .seh_stackalloc 320
+  ; CHECK:      leaq    128(%rsp), %rbp
+  ; CHECK:      .seh_setframe 5, 128
+  ; CHECK:      movl    240(%rbp), %eax
+  ; CHECK:      leaq    192(%rbp), %rsp
+  alloca [300 x i8], align 64
+  ret i32 %e
+}
+
+define i32 @f8(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) "no-frame-pointer-elim"="true" {
+  ; CHECK-LABEL: f8:
+  ; CHECK:        subq    $384, %rsp
+  ; CHECK:        .seh_stackalloc 384
+  ; CHECK:        leaq    128(%rsp), %rbp
+  ; CHECK:        .seh_setframe 5, 128
+
+  %alloca = alloca [300 x i8], align 64
+  ; CHECK:        andq    $-64, %rsp
+  ; CHECK:        movq    %rsp, %rbx
+
+  alloca i32, i32 %a
+  ; CHECK:        movl    %ecx, %eax
+  ; CHECK:        leaq    15(,%rax,4), %rax
+  ; CHECK:        andq    $-16, %rax
+  ; CHECK:        callq   __chkstk
+  ; CHECK:        subq    %rax, %rsp
+
+  %gep = getelementptr [300 x i8]* %alloca, i32 0, i32 0
+  call void @external(i8* %gep)
+  ; CHECK:        subq    $32, %rsp
+  ; CHECK:        leaq    (%rbx), %rcx
+  ; CHECK:        callq   external
+  ; CHECK:        addq    $32, %rsp
+
+  ret i32 %e
+  ; CHECK:        movl    %esi, %eax
+  ; CHECK:        leaq    256(%rbp), %rsp
+}
+
+declare i8* @llvm.returnaddress(i32) nounwind readnone
+
+declare void @llvm.va_start(i8*) nounwind





More information about the llvm-commits mailing list