[llvm-commits] [llvm] r160248 - in /llvm/trunk: lib/Target/X86/X86FrameLowering.cpp test/CodeGen/X86/dynamic-allocas-VLAs.ll test/CodeGen/X86/force-align-stack-alloca.ll test/CodeGen/X86/pr11468.ll
Alexey Samsonov
samsonov at google.com
Mon Jul 16 04:59:21 PDT 2012
I'll get to this in about an hour.
On Mon, Jul 16, 2012 at 3:23 PM, NAKAMURA Takumi <geek4civic at gmail.com>wrote:
> Alexey,
>
> It broke tests in Win32 (includes cygming).
>
> Failing Tests (2):
> LLVM :: CodeGen/X86/epilogue.ll
> LLVM :: CodeGen/X86/widen_arith-3.ll
>
> Please reconfirm them. You can easily reproduce failures to add
> -mtriple=i686-win32 locally on those tests.
>
> ...Takumi
>
> 2012/7/16 Alexey Samsonov <samsonov at google.com>:
> > Author: samsonov
> > Date: Mon Jul 16 01:54:09 2012
> > New Revision: 160248
> >
> > URL: http://llvm.org/viewvc/llvm-project?rev=160248&view=rev
> > Log:
> > This CL changes the function prologue and epilogue emitted on X86 when
> stack needs realignment.
> > It is intended to fix PR11468.
> >
> > Old prologue and epilogue looked like this:
> > push %rbp
> > mov %rsp, %rbp
> > and $alignment, %rsp
> > push %r14
> > push %r15
> > ...
> > pop %r15
> > pop %r14
> > mov %rbp, %rsp
> > pop %rbp
> >
> > The problem was to reference the locations of callee-saved registers in
> exception handling:
> > locations of callee-saved had to be re-calculated regarding the stack
> alignment operation. It would
> > take some effort to implement this in LLVM, as currently MachineLocation
> can only have the form
> > "Register + Offset". Funciton prologue and epilogue are now changed to:
> >
> > push %rbp
> > mov %rsp, %rbp
> > push %14
> > push %15
> > and $alignment, %rsp
> > ...
> > lea -$size_of_saved_registers(%rbp), %rsp
> > pop %r15
> > pop %r14
> > pop %rbp
> >
> > Reviewed by Chad Rosier.
> >
> > Added:
> > llvm/trunk/test/CodeGen/X86/pr11468.ll
> > Modified:
> > llvm/trunk/lib/Target/X86/X86FrameLowering.cpp
> > llvm/trunk/test/CodeGen/X86/dynamic-allocas-VLAs.ll
> > llvm/trunk/test/CodeGen/X86/force-align-stack-alloca.ll
> >
> > Modified: llvm/trunk/lib/Target/X86/X86FrameLowering.cpp
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FrameLowering.cpp?rev=160248&r1=160247&r2=160248&view=diff
> >
> ==============================================================================
> > --- llvm/trunk/lib/Target/X86/X86FrameLowering.cpp (original)
> > +++ llvm/trunk/lib/Target/X86/X86FrameLowering.cpp Mon Jul 16 01:54:09
> 2012
> > @@ -722,10 +722,14 @@
> > if (HasFP) {
> > // Calculate required stack adjustment.
> > uint64_t FrameSize = StackSize - SlotSize;
> > - if (RegInfo->needsStackRealignment(MF))
> > - FrameSize = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
> > -
> > - NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
> > + if (RegInfo->needsStackRealignment(MF)) {
> > + // Callee-saved registers are pushed on stack before the stack
> > + // is realigned.
> > + FrameSize -= X86FI->getCalleeSavedFrameSize();
> > + NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
> > + } else {
> > + NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
> > + }
> >
> > // Get the offset of the stack slot for the EBP register, which is
> > // guaranteed to be the last slot by
> processFunctionBeforeFrameFinalized.
> > @@ -782,19 +786,6 @@
> > for (MachineFunction::iterator I = llvm::next(MF.begin()), E =
> MF.end();
> > I != E; ++I)
> > I->addLiveIn(FramePtr);
> > -
> > - // Realign stack
> > - if (RegInfo->needsStackRealignment(MF)) {
> > - MachineInstr *MI =
> > - BuildMI(MBB, MBBI, DL,
> > - TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri),
> StackPtr)
> > - .addReg(StackPtr)
> > - .addImm(-MaxAlign)
> > - .setMIFlag(MachineInstr::FrameSetup);
> > -
> > - // The EFLAGS implicit def is dead.
> > - MI->getOperand(3).setIsDead();
> > - }
> > } else {
> > NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
> > }
> > @@ -824,6 +815,27 @@
> > }
> > }
> >
> > + // Realign stack after we pushed callee-saved registers (so that
> we'll be
> > + // able to calculate their offsets from the frame pointer).
> > +
> > + // NOTE: We push the registers before realigning the stack, so
> > + // vector callee-saved (xmm) registers may be saved w/o proper
> > + // alignment in this way. However, currently these regs are saved in
> > + // stack slots (see X86FrameLowering::spillCalleeSavedRegisters()), so
> > + // this shouldn't be a problem.
> > + if (RegInfo->needsStackRealignment(MF)) {
> > + assert(HasFP && "There should be a frame pointer if stack is
> realigned.");
> > + MachineInstr *MI =
> > + BuildMI(MBB, MBBI, DL,
> > + TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri),
> StackPtr)
> > + .addReg(StackPtr)
> > + .addImm(-MaxAlign)
> > + .setMIFlag(MachineInstr::FrameSetup);
> > +
> > + // The EFLAGS implicit def is dead.
> > + MI->getOperand(3).setIsDead();
> > + }
> > +
> > DL = MBB.findDebugLoc(MBBI);
> >
> > // If there is an SUB32ri of ESP immediately before this instruction,
> merge
> > @@ -975,7 +987,6 @@
> > unsigned SlotSize = RegInfo->getSlotSize();
> > unsigned FramePtr = RegInfo->getFrameRegister(MF);
> > unsigned StackPtr = RegInfo->getStackRegister();
> > - unsigned BasePtr = RegInfo->getBaseRegister();
> >
> > switch (RetOpcode) {
> > default:
> > @@ -1013,10 +1024,14 @@
> > if (hasFP(MF)) {
> > // Calculate required stack adjustment.
> > uint64_t FrameSize = StackSize - SlotSize;
> > - if (RegInfo->needsStackRealignment(MF))
> > - FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign;
> > -
> > - NumBytes = FrameSize - CSSize;
> > + if (RegInfo->needsStackRealignment(MF)) {
> > + // Callee-saved registers were pushed on stack before the stack
> > + // was realigned.
> > + FrameSize -= CSSize;
> > + NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
> > + } else {
> > + NumBytes = FrameSize - CSSize;
> > + }
> >
> > // Pop EBP.
> > BuildMI(MBB, MBBI, DL,
> > @@ -1026,7 +1041,6 @@
> > }
> >
> > // Skip the callee-saved pop instructions.
> > - MachineBasicBlock::iterator LastCSPop = MBBI;
> > while (MBBI != MBB.begin()) {
> > MachineBasicBlock::iterator PI = prior(MBBI);
> > unsigned Opc = PI->getOpcode();
> > @@ -1037,6 +1051,7 @@
> >
> > --MBBI;
> > }
> > + MachineBasicBlock::iterator FirstCSPop = MBBI;
> >
> > DL = MBBI->getDebugLoc();
> >
> > @@ -1045,40 +1060,19 @@
> > if (NumBytes || MFI->hasVarSizedObjects())
> > mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
> >
> > - // Restore the SP from the BP, if necessary.
> > - if (RegInfo->hasBasePointer(MF)) {
> > - BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::MOV64rr :
> X86::MOV32rr),
> > - StackPtr).addReg(BasePtr);
> > -
> > - // When restoring from the BP we must use a cached SP adjustment.
> > - NumBytes = X86FI->getBasePtrStackAdjustment();
> > - }
> > -
> > // If dynamic alloca is used, then reset esp to point to the last
> callee-saved
> > // slot before popping them off! Same applies for the case, when
> stack was
> > // realigned.
> > - if (RegInfo->needsStackRealignment(MF)) {
> > - // We cannot use LEA here, because stack pointer was realigned. We
> need to
> > - // deallocate local frame back.
> > - if (CSSize) {
> > - emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, UseLEA, TII,
> > - *RegInfo);
> > - MBBI = prior(LastCSPop);
> > - }
> > -
> > - BuildMI(MBB, MBBI, DL,
> > - TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
> > - StackPtr).addReg(FramePtr);
> > - } else if (MFI->hasVarSizedObjects()) {
> > - if (CSSize) {
> > - unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r;
> > - MachineInstr *MI =
> > - addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr),
> > - FramePtr, false, -CSSize);
> > - MBB.insert(MBBI, MI);
> > + if (RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) {
> > + if (RegInfo->needsStackRealignment(MF))
> > + MBBI = FirstCSPop;
> > + if (CSSize != 0) {
> > + unsigned Opc = getLEArOpcode(Is64Bit);
> > + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
> > + FramePtr, false, -CSSize);
> > } else {
> > - BuildMI(MBB, MBBI, DL,
> > - TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr)
> > + unsigned Opc = (Is64Bit ? X86::MOV64rr : X86::MOV32rr);
> > + BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
> > .addReg(FramePtr);
> > }
> > } else if (NumBytes) {
> >
> > Modified: llvm/trunk/test/CodeGen/X86/dynamic-allocas-VLAs.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/dynamic-allocas-VLAs.ll?rev=160248&r1=160247&r2=160248&view=diff
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/X86/dynamic-allocas-VLAs.ll (original)
> > +++ llvm/trunk/test/CodeGen/X86/dynamic-allocas-VLAs.ll Mon Jul 16
> 01:54:09 2012
> > @@ -85,20 +85,19 @@
> > ; CHECK: _t4
> > ; CHECK: pushq %rbp
> > ; CHECK: movq %rsp, %rbp
> > -; CHECK: andq $-32, %rsp
> > ; CHECK: pushq %r14
> > ; CHECK: pushq %rbx
> > -; CHECK: subq $[[STACKADJ:[0-9]+]], %rsp
> > +; CHECK: andq $-32, %rsp
> > +; CHECK: subq ${{[0-9]+}}, %rsp
> > ; CHECK: movq %rsp, %rbx
> > ;
> > ; CHECK: leaq {{[0-9]*}}(%rbx), %rdi
> > ; CHECK: leaq {{[0-9]*}}(%rbx), %rdx
> > ; CHECK: callq _t4_helper
> > ;
> > -; CHECK: addq $[[STACKADJ]], %rsp
> > +; CHECK: leaq -16(%rbp), %rsp
> > ; CHECK: popq %rbx
> > ; CHECK: popq %r14
> > -; CHECK: movq %rbp, %rsp
> > ; CHECK: popq %rbp
> > }
> >
> > @@ -176,19 +175,17 @@
> > ; CHECK: _t7
> > ; CHECK: pushq %rbp
> > ; CHECK: movq %rsp, %rbp
> > -; CHECK: andq $-32, %rsp
> > ; CHECK: pushq %rbx
> > -; CHECK: subq $[[ADJ:[0-9]+]], %rsp
> > +; CHECK: andq $-32, %rsp
> > +; CHECK: subq ${{[0-9]+}}, %rsp
> > ; CHECK: movq %rsp, %rbx
> >
> > ; Stack adjustment for byval
> > ; CHECK: subq {{.*}}, %rsp
> > ; CHECK: callq _bar
> > ; CHECK-NOT: addq {{.*}}, %rsp
> > -; CHECK: movq %rbx, %rsp
> > -; CHECK: addq $[[ADJ]], %rsp
> > +; CHECK: leaq -8(%rbp), %rsp
> > ; CHECK: popq %rbx
> > -; CHECK: movq %rbp, %rsp
> > ; CHECK: popq %rbp
> > }
> >
> > @@ -229,14 +226,12 @@
> > ; FORCE-ALIGN: _t9
> > ; FORCE-ALIGN: pushq %rbp
> > ; FORCE-ALIGN: movq %rsp, %rbp
> > -; FORCE-ALIGN: andq $-32, %rsp
> > ; FORCE-ALIGN: pushq %rbx
> > -; FORCE-ALIGN: subq $24, %rsp
> > +; FORCE-ALIGN: andq $-32, %rsp
> > +; FORCE-ALIGN: subq $32, %rsp
> > ; FORCE-ALIGN: movq %rsp, %rbx
> >
> > -; FORCE-ALIGN: movq %rbx, %rsp
> > -; FORCE-ALIGN: addq $24, %rsp
> > +; FORCE-ALIGN: leaq -8(%rbp), %rsp
> > ; FORCE-ALIGN: popq %rbx
> > -; FORCE-ALIGN: movq %rbp, %rsp
> > ; FORCE-ALIGN: popq %rbp
> > }
> >
> > Modified: llvm/trunk/test/CodeGen/X86/force-align-stack-alloca.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/force-align-stack-alloca.ll?rev=160248&r1=160247&r2=160248&view=diff
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/X86/force-align-stack-alloca.ll (original)
> > +++ llvm/trunk/test/CodeGen/X86/force-align-stack-alloca.ll Mon Jul 16
> 01:54:09 2012
> > @@ -19,10 +19,10 @@
> > ; CHECK: g:
> > ; CHECK: pushl %ebp
> > ; CHECK-NEXT: movl %esp, %ebp
> > -; CHECK-NEXT: andl $-32, %esp
> > ; CHECK-NEXT: pushl
> > ; CHECK-NEXT: pushl
> > -; CHECK-NEXT: subl $24, %esp
> > +; CHECK-NEXT: andl $-32, %esp
> > +; CHECK-NEXT: subl $32, %esp
> > ;
> > ; Now setup the base pointer (%ebx).
> > ; CHECK-NEXT: movl %esp, %ebx
> > @@ -46,17 +46,13 @@
> > ; CHECK-NEXT: addl $32, %esp
> > ; CHECK-NOT: {{[^ ,]*}}, %esp
> > ;
> > -; Restore %esp from %ebx (base pointer) so we can pop the callee-saved
> > -; registers. This is the state prior to the allocation of VLAs.
> > +; Restore %esp from %ebp (frame pointer) and subtract the size of
> > +; zone with callee-saved registers to pop them.
> > +; This is the state prior to stack realignment and the allocation of
> VLAs.
> > ; CHECK-NOT: popl
> > -; CHECK: movl %ebx, %esp
> > -; CHECK-NEXT: addl $24, %esp
> > +; CHECK: leal -8(%ebp), %esp
> > ; CHECK-NEXT: popl
> > ; CHECK-NEXT: popl
> > -;
> > -; Finally we need to restore %esp from %ebp due to dynamic stack
> > -; realignment.
> > -; CHECK-NEXT: movl %ebp, %esp
> > ; CHECK-NEXT: popl %ebp
> > ; CHECK-NEXT: ret
> >
> >
> > Added: llvm/trunk/test/CodeGen/X86/pr11468.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr11468.ll?rev=160248&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/X86/pr11468.ll (added)
> > +++ llvm/trunk/test/CodeGen/X86/pr11468.ll Mon Jul 16 01:54:09 2012
> > @@ -0,0 +1,33 @@
> > +; RUN: llc < %s -force-align-stack -stack-alignment=32 -march=x86-64
> -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s
> > +; PR11468
> > +
> > +define void @f(i64 %sz) uwtable {
> > +entry:
> > + %a = alloca i32, align 32
> > + store volatile i32 0, i32* %a, align 32
> > + ; force to push r14 on stack
> > + call void asm sideeffect "nop",
> "~{r14},~{dirflag},~{fpsr},~{flags}"() nounwind, !srcloc !0
> > + ret void
> > +
> > +; CHECK: _f
> > +; CHECK: pushq %rbp
> > +; CHECK: .cfi_offset %rbp, -16
> > +; CHECK: movq %rsp, %rbp
> > +; CHECK: .cfi_def_cfa_register %rbp
> > +
> > +; We first push register on stack, and then realign it, so that
> > +; .cfi_offset value is correct
> > +; CHECK: pushq %r14
> > +; CHECK: andq $-32, %rsp
> > +; CHECK: .cfi_offset %r14, -24
> > +
> > +; Restore %rsp from %rbp and subtract the total size of saved regsiters.
> > +; CHECK: leaq -8(%rbp), %rsp
> > +
> > +; Pop saved registers.
> > +; CHECK: popq %r14
> > +; CHECK: popq %rbp
> > +}
> > +
> > +!0 = metadata !{i32 125}
> > +
> >
> >
> > _______________________________________________
> > llvm-commits mailing list
> > llvm-commits at cs.uiuc.edu
> > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
--
Alexey Samsonov, MSK
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20120716/869f7bbc/attachment.html>
More information about the llvm-commits
mailing list