[llvm-commits] [llvm] r160248 - in /llvm/trunk: lib/Target/X86/X86FrameLowering.cpp test/CodeGen/X86/dynamic-allocas-VLAs.ll test/CodeGen/X86/force-align-stack-alloca.ll test/CodeGen/X86/pr11468.ll

NAKAMURA Takumi geek4civic at gmail.com
Mon Jul 16 04:23:46 PDT 2012


Alexey,

It broke tests in Win32 (includes cygming).

Failing Tests (2):
    LLVM :: CodeGen/X86/epilogue.ll
    LLVM :: CodeGen/X86/widen_arith-3.ll

Please reconfirm them. You can easily reproduce failures to add
-mtriple=i686-win32 locally on those tests.

...Takumi

2012/7/16 Alexey Samsonov <samsonov at google.com>:
> Author: samsonov
> Date: Mon Jul 16 01:54:09 2012
> New Revision: 160248
>
> URL: http://llvm.org/viewvc/llvm-project?rev=160248&view=rev
> Log:
> This CL changes the function prologue and epilogue emitted on X86 when stack needs realignment.
> It is intended to fix PR11468.
>
> Old prologue and epilogue looked like this:
> push %rbp
> mov %rsp, %rbp
> and $alignment, %rsp
> push %r14
> push %r15
> ...
> pop %r15
> pop %r14
> mov %rbp, %rsp
> pop %rbp
>
> The problem was to reference the locations of callee-saved registers in exception handling:
> locations of callee-saved had to be re-calculated regarding the stack alignment operation. It would
> take some effort to implement this in LLVM, as currently MachineLocation can only have the form
> "Register + Offset". Funciton prologue and epilogue are now changed to:
>
> push %rbp
> mov %rsp, %rbp
> push %14
> push %15
> and $alignment, %rsp
> ...
> lea -$size_of_saved_registers(%rbp), %rsp
> pop %r15
> pop %r14
> pop %rbp
>
> Reviewed by Chad Rosier.
>
> Added:
>     llvm/trunk/test/CodeGen/X86/pr11468.ll
> Modified:
>     llvm/trunk/lib/Target/X86/X86FrameLowering.cpp
>     llvm/trunk/test/CodeGen/X86/dynamic-allocas-VLAs.ll
>     llvm/trunk/test/CodeGen/X86/force-align-stack-alloca.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86FrameLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FrameLowering.cpp?rev=160248&r1=160247&r2=160248&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86FrameLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86FrameLowering.cpp Mon Jul 16 01:54:09 2012
> @@ -722,10 +722,14 @@
>    if (HasFP) {
>      // Calculate required stack adjustment.
>      uint64_t FrameSize = StackSize - SlotSize;
> -    if (RegInfo->needsStackRealignment(MF))
> -      FrameSize = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
> -
> -    NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
> +    if (RegInfo->needsStackRealignment(MF)) {
> +      // Callee-saved registers are pushed on stack before the stack
> +      // is realigned.
> +      FrameSize -= X86FI->getCalleeSavedFrameSize();
> +      NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
> +    } else {
> +      NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
> +    }
>
>      // Get the offset of the stack slot for the EBP register, which is
>      // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
> @@ -782,19 +786,6 @@
>      for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
>           I != E; ++I)
>        I->addLiveIn(FramePtr);
> -
> -    // Realign stack
> -    if (RegInfo->needsStackRealignment(MF)) {
> -      MachineInstr *MI =
> -        BuildMI(MBB, MBBI, DL,
> -                TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), StackPtr)
> -        .addReg(StackPtr)
> -        .addImm(-MaxAlign)
> -        .setMIFlag(MachineInstr::FrameSetup);
> -
> -      // The EFLAGS implicit def is dead.
> -      MI->getOperand(3).setIsDead();
> -    }
>    } else {
>      NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
>    }
> @@ -824,6 +815,27 @@
>      }
>    }
>
> +  // Realign stack after we pushed callee-saved registers (so that we'll be
> +  // able to calculate their offsets from the frame pointer).
> +
> +  // NOTE: We push the registers before realigning the stack, so
> +  // vector callee-saved (xmm) registers may be saved w/o proper
> +  // alignment in this way. However, currently these regs are saved in
> +  // stack slots (see X86FrameLowering::spillCalleeSavedRegisters()), so
> +  // this shouldn't be a problem.
> +  if (RegInfo->needsStackRealignment(MF)) {
> +    assert(HasFP && "There should be a frame pointer if stack is realigned.");
> +    MachineInstr *MI =
> +      BuildMI(MBB, MBBI, DL,
> +              TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), StackPtr)
> +      .addReg(StackPtr)
> +      .addImm(-MaxAlign)
> +      .setMIFlag(MachineInstr::FrameSetup);
> +
> +    // The EFLAGS implicit def is dead.
> +    MI->getOperand(3).setIsDead();
> +  }
> +
>    DL = MBB.findDebugLoc(MBBI);
>
>    // If there is an SUB32ri of ESP immediately before this instruction, merge
> @@ -975,7 +987,6 @@
>    unsigned SlotSize = RegInfo->getSlotSize();
>    unsigned FramePtr = RegInfo->getFrameRegister(MF);
>    unsigned StackPtr = RegInfo->getStackRegister();
> -  unsigned BasePtr = RegInfo->getBaseRegister();
>
>    switch (RetOpcode) {
>    default:
> @@ -1013,10 +1024,14 @@
>    if (hasFP(MF)) {
>      // Calculate required stack adjustment.
>      uint64_t FrameSize = StackSize - SlotSize;
> -    if (RegInfo->needsStackRealignment(MF))
> -      FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign;
> -
> -    NumBytes = FrameSize - CSSize;
> +    if (RegInfo->needsStackRealignment(MF)) {
> +      // Callee-saved registers were pushed on stack before the stack
> +      // was realigned.
> +      FrameSize -= CSSize;
> +      NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
> +    } else {
> +      NumBytes = FrameSize - CSSize;
> +    }
>
>      // Pop EBP.
>      BuildMI(MBB, MBBI, DL,
> @@ -1026,7 +1041,6 @@
>    }
>
>    // Skip the callee-saved pop instructions.
> -  MachineBasicBlock::iterator LastCSPop = MBBI;
>    while (MBBI != MBB.begin()) {
>      MachineBasicBlock::iterator PI = prior(MBBI);
>      unsigned Opc = PI->getOpcode();
> @@ -1037,6 +1051,7 @@
>
>      --MBBI;
>    }
> +  MachineBasicBlock::iterator FirstCSPop = MBBI;
>
>    DL = MBBI->getDebugLoc();
>
> @@ -1045,40 +1060,19 @@
>    if (NumBytes || MFI->hasVarSizedObjects())
>      mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
>
> -  // Restore the SP from the BP, if necessary.
> -  if (RegInfo->hasBasePointer(MF)) {
> -    BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
> -            StackPtr).addReg(BasePtr);
> -
> -    // When restoring from the BP we must use a cached SP adjustment.
> -    NumBytes = X86FI->getBasePtrStackAdjustment();
> -  }
> -
>    // If dynamic alloca is used, then reset esp to point to the last callee-saved
>    // slot before popping them off! Same applies for the case, when stack was
>    // realigned.
> -  if (RegInfo->needsStackRealignment(MF)) {
> -    // We cannot use LEA here, because stack pointer was realigned. We need to
> -    // deallocate local frame back.
> -    if (CSSize) {
> -      emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, UseLEA, TII,
> -                   *RegInfo);
> -      MBBI = prior(LastCSPop);
> -    }
> -
> -    BuildMI(MBB, MBBI, DL,
> -            TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
> -            StackPtr).addReg(FramePtr);
> -  } else if (MFI->hasVarSizedObjects()) {
> -    if (CSSize) {
> -      unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r;
> -      MachineInstr *MI =
> -        addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr),
> -                     FramePtr, false, -CSSize);
> -      MBB.insert(MBBI, MI);
> +  if (RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) {
> +    if (RegInfo->needsStackRealignment(MF))
> +      MBBI = FirstCSPop;
> +    if (CSSize != 0) {
> +      unsigned Opc = getLEArOpcode(Is64Bit);
> +      addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
> +                   FramePtr, false, -CSSize);
>      } else {
> -      BuildMI(MBB, MBBI, DL,
> -              TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr)
> +      unsigned Opc = (Is64Bit ? X86::MOV64rr : X86::MOV32rr);
> +      BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
>          .addReg(FramePtr);
>      }
>    } else if (NumBytes) {
>
> Modified: llvm/trunk/test/CodeGen/X86/dynamic-allocas-VLAs.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/dynamic-allocas-VLAs.ll?rev=160248&r1=160247&r2=160248&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/dynamic-allocas-VLAs.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/dynamic-allocas-VLAs.ll Mon Jul 16 01:54:09 2012
> @@ -85,20 +85,19 @@
>  ; CHECK: _t4
>  ; CHECK: pushq %rbp
>  ; CHECK: movq %rsp, %rbp
> -; CHECK: andq $-32, %rsp
>  ; CHECK: pushq %r14
>  ; CHECK: pushq %rbx
> -; CHECK: subq $[[STACKADJ:[0-9]+]], %rsp
> +; CHECK: andq $-32, %rsp
> +; CHECK: subq ${{[0-9]+}}, %rsp
>  ; CHECK: movq %rsp, %rbx
>  ;
>  ; CHECK: leaq {{[0-9]*}}(%rbx), %rdi
>  ; CHECK: leaq {{[0-9]*}}(%rbx), %rdx
>  ; CHECK: callq   _t4_helper
>  ;
> -; CHECK: addq $[[STACKADJ]], %rsp
> +; CHECK: leaq -16(%rbp), %rsp
>  ; CHECK: popq %rbx
>  ; CHECK: popq %r14
> -; CHECK: movq %rbp, %rsp
>  ; CHECK: popq %rbp
>  }
>
> @@ -176,19 +175,17 @@
>  ; CHECK: _t7
>  ; CHECK:     pushq %rbp
>  ; CHECK:     movq %rsp, %rbp
> -; CHECK:     andq $-32, %rsp
>  ; CHECK:     pushq %rbx
> -; CHECK:     subq $[[ADJ:[0-9]+]], %rsp
> +; CHECK:     andq $-32, %rsp
> +; CHECK:     subq ${{[0-9]+}}, %rsp
>  ; CHECK:     movq %rsp, %rbx
>
>  ; Stack adjustment for byval
>  ; CHECK:     subq {{.*}}, %rsp
>  ; CHECK:     callq _bar
>  ; CHECK-NOT: addq {{.*}}, %rsp
> -; CHECK:     movq %rbx, %rsp
> -; CHECK:     addq $[[ADJ]], %rsp
> +; CHECK:     leaq -8(%rbp), %rsp
>  ; CHECK:     popq %rbx
> -; CHECK:     movq %rbp, %rsp
>  ; CHECK:     popq %rbp
>  }
>
> @@ -229,14 +226,12 @@
>  ; FORCE-ALIGN: _t9
>  ; FORCE-ALIGN: pushq %rbp
>  ; FORCE-ALIGN: movq %rsp, %rbp
> -; FORCE-ALIGN: andq $-32, %rsp
>  ; FORCE-ALIGN: pushq %rbx
> -; FORCE-ALIGN: subq $24, %rsp
> +; FORCE-ALIGN: andq $-32, %rsp
> +; FORCE-ALIGN: subq $32, %rsp
>  ; FORCE-ALIGN: movq %rsp, %rbx
>
> -; FORCE-ALIGN: movq %rbx, %rsp
> -; FORCE-ALIGN: addq $24, %rsp
> +; FORCE-ALIGN: leaq -8(%rbp), %rsp
>  ; FORCE-ALIGN: popq %rbx
> -; FORCE-ALIGN: movq %rbp, %rsp
>  ; FORCE-ALIGN: popq %rbp
>  }
>
> Modified: llvm/trunk/test/CodeGen/X86/force-align-stack-alloca.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/force-align-stack-alloca.ll?rev=160248&r1=160247&r2=160248&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/force-align-stack-alloca.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/force-align-stack-alloca.ll Mon Jul 16 01:54:09 2012
> @@ -19,10 +19,10 @@
>  ; CHECK: g:
>  ; CHECK:      pushl  %ebp
>  ; CHECK-NEXT: movl   %esp, %ebp
> -; CHECK-NEXT: andl   $-32, %esp
>  ; CHECK-NEXT: pushl
>  ; CHECK-NEXT: pushl
> -; CHECK-NEXT: subl   $24, %esp
> +; CHECK-NEXT: andl   $-32, %esp
> +; CHECK-NEXT: subl   $32, %esp
>  ;
>  ; Now setup the base pointer (%ebx).
>  ; CHECK-NEXT: movl   %esp, %ebx
> @@ -46,17 +46,13 @@
>  ; CHECK-NEXT: addl   $32, %esp
>  ; CHECK-NOT:         {{[^ ,]*}}, %esp
>  ;
> -; Restore %esp from %ebx (base pointer) so we can pop the callee-saved
> -; registers.  This is the state prior to the allocation of VLAs.
> +; Restore %esp from %ebp (frame pointer) and subtract the size of
> +; zone with callee-saved registers to pop them.
> +; This is the state prior to stack realignment and the allocation of VLAs.
>  ; CHECK-NOT:  popl
> -; CHECK:      movl   %ebx, %esp
> -; CHECK-NEXT: addl   $24, %esp
> +; CHECK:      leal   -8(%ebp), %esp
>  ; CHECK-NEXT: popl
>  ; CHECK-NEXT: popl
> -;
> -; Finally we need to restore %esp from %ebp due to dynamic stack
> -; realignment.
> -; CHECK-NEXT: movl   %ebp, %esp
>  ; CHECK-NEXT: popl   %ebp
>  ; CHECK-NEXT: ret
>
>
> Added: llvm/trunk/test/CodeGen/X86/pr11468.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr11468.ll?rev=160248&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/pr11468.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/pr11468.ll Mon Jul 16 01:54:09 2012
> @@ -0,0 +1,33 @@
> +; RUN: llc < %s -force-align-stack -stack-alignment=32 -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s
> +; PR11468
> +
> +define void @f(i64 %sz) uwtable {
> +entry:
> +  %a = alloca i32, align 32
> +  store volatile i32 0, i32* %a, align 32
> +  ; force to push r14 on stack
> +  call void asm sideeffect "nop", "~{r14},~{dirflag},~{fpsr},~{flags}"() nounwind, !srcloc !0
> +  ret void
> +
> +; CHECK: _f
> +; CHECK: pushq %rbp
> +; CHECK: .cfi_offset %rbp, -16
> +; CHECK: movq %rsp, %rbp
> +; CHECK: .cfi_def_cfa_register %rbp
> +
> +; We first push register on stack, and then realign it, so that
> +; .cfi_offset value is correct
> +; CHECK: pushq %r14
> +; CHECK: andq $-32, %rsp
> +; CHECK: .cfi_offset %r14, -24
> +
> +; Restore %rsp from %rbp and subtract the total size of saved regsiters.
> +; CHECK: leaq -8(%rbp), %rsp
> +
> +; Pop saved registers.
> +; CHECK: popq %r14
> +; CHECK: popq %rbp
> +}
> +
> +!0 = metadata !{i32 125}
> +
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits



More information about the llvm-commits mailing list