I'll get to this in about an hour.<br><br><div class="gmail_quote">On Mon, Jul 16, 2012 at 3:23 PM, NAKAMURA Takumi <span dir="ltr"><<a href="mailto:geek4civic@gmail.com" target="_blank">geek4civic@gmail.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Alexey,<br>
<br>
It broke tests in Win32 (includes cygming).<br>
<br>
Failing Tests (2):<br>
LLVM :: CodeGen/X86/epilogue.ll<br>
LLVM :: CodeGen/X86/widen_arith-3.ll<br>
<br>
Please reconfirm them. You can easily reproduce failures to add<br>
-mtriple=i686-win32 locally on those tests.<br>
<br>
...Takumi<br>
<br>
2012/7/16 Alexey Samsonov <<a href="mailto:samsonov@google.com">samsonov@google.com</a>>:<br>
<div class="HOEnZb"><div class="h5">> Author: samsonov<br>
> Date: Mon Jul 16 01:54:09 2012<br>
> New Revision: 160248<br>
><br>
> URL: <a href="http://llvm.org/viewvc/llvm-project?rev=160248&view=rev" target="_blank">http://llvm.org/viewvc/llvm-project?rev=160248&view=rev</a><br>
> Log:<br>
> This CL changes the function prologue and epilogue emitted on X86 when stack needs realignment.<br>
> It is intended to fix PR11468.<br>
><br>
> Old prologue and epilogue looked like this:<br>
> push %rbp<br>
> mov %rsp, %rbp<br>
> and $alignment, %rsp<br>
> push %r14<br>
> push %r15<br>
> ...<br>
> pop %r15<br>
> pop %r14<br>
> mov %rbp, %rsp<br>
> pop %rbp<br>
><br>
> The problem was to reference the locations of callee-saved registers in exception handling:<br>
> locations of callee-saved had to be re-calculated regarding the stack alignment operation. It would<br>
> take some effort to implement this in LLVM, as currently MachineLocation can only have the form<br>
> "Register + Offset". Funciton prologue and epilogue are now changed to:<br>
><br>
> push %rbp<br>
> mov %rsp, %rbp<br>
> push %14<br>
> push %15<br>
> and $alignment, %rsp<br>
> ...<br>
> lea -$size_of_saved_registers(%rbp), %rsp<br>
> pop %r15<br>
> pop %r14<br>
> pop %rbp<br>
><br>
> Reviewed by Chad Rosier.<br>
><br>
> Added:<br>
> llvm/trunk/test/CodeGen/X86/pr11468.ll<br>
> Modified:<br>
> llvm/trunk/lib/Target/X86/X86FrameLowering.cpp<br>
> llvm/trunk/test/CodeGen/X86/dynamic-allocas-VLAs.ll<br>
> llvm/trunk/test/CodeGen/X86/force-align-stack-alloca.ll<br>
><br>
> Modified: llvm/trunk/lib/Target/X86/X86FrameLowering.cpp<br>
> URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FrameLowering.cpp?rev=160248&r1=160247&r2=160248&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FrameLowering.cpp?rev=160248&r1=160247&r2=160248&view=diff</a><br>
> ==============================================================================<br>
> --- llvm/trunk/lib/Target/X86/X86FrameLowering.cpp (original)<br>
> +++ llvm/trunk/lib/Target/X86/X86FrameLowering.cpp Mon Jul 16 01:54:09 2012<br>
> @@ -722,10 +722,14 @@<br>
> if (HasFP) {<br>
> // Calculate required stack adjustment.<br>
> uint64_t FrameSize = StackSize - SlotSize;<br>
> - if (RegInfo->needsStackRealignment(MF))<br>
> - FrameSize = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;<br>
> -<br>
> - NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();<br>
> + if (RegInfo->needsStackRealignment(MF)) {<br>
> + // Callee-saved registers are pushed on stack before the stack<br>
> + // is realigned.<br>
> + FrameSize -= X86FI->getCalleeSavedFrameSize();<br>
> + NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;<br>
> + } else {<br>
> + NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();<br>
> + }<br>
><br>
> // Get the offset of the stack slot for the EBP register, which is<br>
> // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.<br>
> @@ -782,19 +786,6 @@<br>
> for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();<br>
> I != E; ++I)<br>
> I->addLiveIn(FramePtr);<br>
> -<br>
> - // Realign stack<br>
> - if (RegInfo->needsStackRealignment(MF)) {<br>
> - MachineInstr *MI =<br>
> - BuildMI(MBB, MBBI, DL,<br>
> - TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), StackPtr)<br>
> - .addReg(StackPtr)<br>
> - .addImm(-MaxAlign)<br>
> - .setMIFlag(MachineInstr::FrameSetup);<br>
> -<br>
> - // The EFLAGS implicit def is dead.<br>
> - MI->getOperand(3).setIsDead();<br>
> - }<br>
> } else {<br>
> NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();<br>
> }<br>
> @@ -824,6 +815,27 @@<br>
> }<br>
> }<br>
><br>
> + // Realign stack after we pushed callee-saved registers (so that we'll be<br>
> + // able to calculate their offsets from the frame pointer).<br>
> +<br>
> + // NOTE: We push the registers before realigning the stack, so<br>
> + // vector callee-saved (xmm) registers may be saved w/o proper<br>
> + // alignment in this way. However, currently these regs are saved in<br>
> + // stack slots (see X86FrameLowering::spillCalleeSavedRegisters()), so<br>
> + // this shouldn't be a problem.<br>
> + if (RegInfo->needsStackRealignment(MF)) {<br>
> + assert(HasFP && "There should be a frame pointer if stack is realigned.");<br>
> + MachineInstr *MI =<br>
> + BuildMI(MBB, MBBI, DL,<br>
> + TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), StackPtr)<br>
> + .addReg(StackPtr)<br>
> + .addImm(-MaxAlign)<br>
> + .setMIFlag(MachineInstr::FrameSetup);<br>
> +<br>
> + // The EFLAGS implicit def is dead.<br>
> + MI->getOperand(3).setIsDead();<br>
> + }<br>
> +<br>
> DL = MBB.findDebugLoc(MBBI);<br>
><br>
> // If there is an SUB32ri of ESP immediately before this instruction, merge<br>
> @@ -975,7 +987,6 @@<br>
> unsigned SlotSize = RegInfo->getSlotSize();<br>
> unsigned FramePtr = RegInfo->getFrameRegister(MF);<br>
> unsigned StackPtr = RegInfo->getStackRegister();<br>
> - unsigned BasePtr = RegInfo->getBaseRegister();<br>
><br>
> switch (RetOpcode) {<br>
> default:<br>
> @@ -1013,10 +1024,14 @@<br>
> if (hasFP(MF)) {<br>
> // Calculate required stack adjustment.<br>
> uint64_t FrameSize = StackSize - SlotSize;<br>
> - if (RegInfo->needsStackRealignment(MF))<br>
> - FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign;<br>
> -<br>
> - NumBytes = FrameSize - CSSize;<br>
> + if (RegInfo->needsStackRealignment(MF)) {<br>
> + // Callee-saved registers were pushed on stack before the stack<br>
> + // was realigned.<br>
> + FrameSize -= CSSize;<br>
> + NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;<br>
> + } else {<br>
> + NumBytes = FrameSize - CSSize;<br>
> + }<br>
><br>
> // Pop EBP.<br>
> BuildMI(MBB, MBBI, DL,<br>
> @@ -1026,7 +1041,6 @@<br>
> }<br>
><br>
> // Skip the callee-saved pop instructions.<br>
> - MachineBasicBlock::iterator LastCSPop = MBBI;<br>
> while (MBBI != MBB.begin()) {<br>
> MachineBasicBlock::iterator PI = prior(MBBI);<br>
> unsigned Opc = PI->getOpcode();<br>
> @@ -1037,6 +1051,7 @@<br>
><br>
> --MBBI;<br>
> }<br>
> + MachineBasicBlock::iterator FirstCSPop = MBBI;<br>
><br>
> DL = MBBI->getDebugLoc();<br>
><br>
> @@ -1045,40 +1060,19 @@<br>
> if (NumBytes || MFI->hasVarSizedObjects())<br>
> mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);<br>
><br>
> - // Restore the SP from the BP, if necessary.<br>
> - if (RegInfo->hasBasePointer(MF)) {<br>
> - BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),<br>
> - StackPtr).addReg(BasePtr);<br>
> -<br>
> - // When restoring from the BP we must use a cached SP adjustment.<br>
> - NumBytes = X86FI->getBasePtrStackAdjustment();<br>
> - }<br>
> -<br>
> // If dynamic alloca is used, then reset esp to point to the last callee-saved<br>
> // slot before popping them off! Same applies for the case, when stack was<br>
> // realigned.<br>
> - if (RegInfo->needsStackRealignment(MF)) {<br>
> - // We cannot use LEA here, because stack pointer was realigned. We need to<br>
> - // deallocate local frame back.<br>
> - if (CSSize) {<br>
> - emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, UseLEA, TII,<br>
> - *RegInfo);<br>
> - MBBI = prior(LastCSPop);<br>
> - }<br>
> -<br>
> - BuildMI(MBB, MBBI, DL,<br>
> - TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),<br>
> - StackPtr).addReg(FramePtr);<br>
> - } else if (MFI->hasVarSizedObjects()) {<br>
> - if (CSSize) {<br>
> - unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r;<br>
> - MachineInstr *MI =<br>
> - addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr),<br>
> - FramePtr, false, -CSSize);<br>
> - MBB.insert(MBBI, MI);<br>
> + if (RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) {<br>
> + if (RegInfo->needsStackRealignment(MF))<br>
> + MBBI = FirstCSPop;<br>
> + if (CSSize != 0) {<br>
> + unsigned Opc = getLEArOpcode(Is64Bit);<br>
> + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),<br>
> + FramePtr, false, -CSSize);<br>
> } else {<br>
> - BuildMI(MBB, MBBI, DL,<br>
> - TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr)<br>
> + unsigned Opc = (Is64Bit ? X86::MOV64rr : X86::MOV32rr);<br>
> + BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)<br>
> .addReg(FramePtr);<br>
> }<br>
> } else if (NumBytes) {<br>
><br>
> Modified: llvm/trunk/test/CodeGen/X86/dynamic-allocas-VLAs.ll<br>
> URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/dynamic-allocas-VLAs.ll?rev=160248&r1=160247&r2=160248&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/dynamic-allocas-VLAs.ll?rev=160248&r1=160247&r2=160248&view=diff</a><br>
> ==============================================================================<br>
> --- llvm/trunk/test/CodeGen/X86/dynamic-allocas-VLAs.ll (original)<br>
> +++ llvm/trunk/test/CodeGen/X86/dynamic-allocas-VLAs.ll Mon Jul 16 01:54:09 2012<br>
> @@ -85,20 +85,19 @@<br>
> ; CHECK: _t4<br>
> ; CHECK: pushq %rbp<br>
> ; CHECK: movq %rsp, %rbp<br>
> -; CHECK: andq $-32, %rsp<br>
> ; CHECK: pushq %r14<br>
> ; CHECK: pushq %rbx<br>
> -; CHECK: subq $[[STACKADJ:[0-9]+]], %rsp<br>
> +; CHECK: andq $-32, %rsp<br>
> +; CHECK: subq ${{[0-9]+}}, %rsp<br>
> ; CHECK: movq %rsp, %rbx<br>
> ;<br>
> ; CHECK: leaq {{[0-9]*}}(%rbx), %rdi<br>
> ; CHECK: leaq {{[0-9]*}}(%rbx), %rdx<br>
> ; CHECK: callq _t4_helper<br>
> ;<br>
> -; CHECK: addq $[[STACKADJ]], %rsp<br>
> +; CHECK: leaq -16(%rbp), %rsp<br>
> ; CHECK: popq %rbx<br>
> ; CHECK: popq %r14<br>
> -; CHECK: movq %rbp, %rsp<br>
> ; CHECK: popq %rbp<br>
> }<br>
><br>
> @@ -176,19 +175,17 @@<br>
> ; CHECK: _t7<br>
> ; CHECK: pushq %rbp<br>
> ; CHECK: movq %rsp, %rbp<br>
> -; CHECK: andq $-32, %rsp<br>
> ; CHECK: pushq %rbx<br>
> -; CHECK: subq $[[ADJ:[0-9]+]], %rsp<br>
> +; CHECK: andq $-32, %rsp<br>
> +; CHECK: subq ${{[0-9]+}}, %rsp<br>
> ; CHECK: movq %rsp, %rbx<br>
><br>
> ; Stack adjustment for byval<br>
> ; CHECK: subq {{.*}}, %rsp<br>
> ; CHECK: callq _bar<br>
> ; CHECK-NOT: addq {{.*}}, %rsp<br>
> -; CHECK: movq %rbx, %rsp<br>
> -; CHECK: addq $[[ADJ]], %rsp<br>
> +; CHECK: leaq -8(%rbp), %rsp<br>
> ; CHECK: popq %rbx<br>
> -; CHECK: movq %rbp, %rsp<br>
> ; CHECK: popq %rbp<br>
> }<br>
><br>
> @@ -229,14 +226,12 @@<br>
> ; FORCE-ALIGN: _t9<br>
> ; FORCE-ALIGN: pushq %rbp<br>
> ; FORCE-ALIGN: movq %rsp, %rbp<br>
> -; FORCE-ALIGN: andq $-32, %rsp<br>
> ; FORCE-ALIGN: pushq %rbx<br>
> -; FORCE-ALIGN: subq $24, %rsp<br>
> +; FORCE-ALIGN: andq $-32, %rsp<br>
> +; FORCE-ALIGN: subq $32, %rsp<br>
> ; FORCE-ALIGN: movq %rsp, %rbx<br>
><br>
> -; FORCE-ALIGN: movq %rbx, %rsp<br>
> -; FORCE-ALIGN: addq $24, %rsp<br>
> +; FORCE-ALIGN: leaq -8(%rbp), %rsp<br>
> ; FORCE-ALIGN: popq %rbx<br>
> -; FORCE-ALIGN: movq %rbp, %rsp<br>
> ; FORCE-ALIGN: popq %rbp<br>
> }<br>
><br>
> Modified: llvm/trunk/test/CodeGen/X86/force-align-stack-alloca.ll<br>
> URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/force-align-stack-alloca.ll?rev=160248&r1=160247&r2=160248&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/force-align-stack-alloca.ll?rev=160248&r1=160247&r2=160248&view=diff</a><br>
> ==============================================================================<br>
> --- llvm/trunk/test/CodeGen/X86/force-align-stack-alloca.ll (original)<br>
> +++ llvm/trunk/test/CodeGen/X86/force-align-stack-alloca.ll Mon Jul 16 01:54:09 2012<br>
> @@ -19,10 +19,10 @@<br>
> ; CHECK: g:<br>
> ; CHECK: pushl %ebp<br>
> ; CHECK-NEXT: movl %esp, %ebp<br>
> -; CHECK-NEXT: andl $-32, %esp<br>
> ; CHECK-NEXT: pushl<br>
> ; CHECK-NEXT: pushl<br>
> -; CHECK-NEXT: subl $24, %esp<br>
> +; CHECK-NEXT: andl $-32, %esp<br>
> +; CHECK-NEXT: subl $32, %esp<br>
> ;<br>
> ; Now setup the base pointer (%ebx).<br>
> ; CHECK-NEXT: movl %esp, %ebx<br>
> @@ -46,17 +46,13 @@<br>
> ; CHECK-NEXT: addl $32, %esp<br>
> ; CHECK-NOT: {{[^ ,]*}}, %esp<br>
> ;<br>
> -; Restore %esp from %ebx (base pointer) so we can pop the callee-saved<br>
> -; registers. This is the state prior to the allocation of VLAs.<br>
> +; Restore %esp from %ebp (frame pointer) and subtract the size of<br>
> +; zone with callee-saved registers to pop them.<br>
> +; This is the state prior to stack realignment and the allocation of VLAs.<br>
> ; CHECK-NOT: popl<br>
> -; CHECK: movl %ebx, %esp<br>
> -; CHECK-NEXT: addl $24, %esp<br>
> +; CHECK: leal -8(%ebp), %esp<br>
> ; CHECK-NEXT: popl<br>
> ; CHECK-NEXT: popl<br>
> -;<br>
> -; Finally we need to restore %esp from %ebp due to dynamic stack<br>
> -; realignment.<br>
> -; CHECK-NEXT: movl %ebp, %esp<br>
> ; CHECK-NEXT: popl %ebp<br>
> ; CHECK-NEXT: ret<br>
><br>
><br>
> Added: llvm/trunk/test/CodeGen/X86/pr11468.ll<br>
> URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr11468.ll?rev=160248&view=auto" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr11468.ll?rev=160248&view=auto</a><br>
> ==============================================================================<br>
> --- llvm/trunk/test/CodeGen/X86/pr11468.ll (added)<br>
> +++ llvm/trunk/test/CodeGen/X86/pr11468.ll Mon Jul 16 01:54:09 2012<br>
> @@ -0,0 +1,33 @@<br>
> +; RUN: llc < %s -force-align-stack -stack-alignment=32 -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s<br>
> +; PR11468<br>
> +<br>
> +define void @f(i64 %sz) uwtable {<br>
> +entry:<br>
> + %a = alloca i32, align 32<br>
> + store volatile i32 0, i32* %a, align 32<br>
> + ; force to push r14 on stack<br>
> + call void asm sideeffect "nop", "~{r14},~{dirflag},~{fpsr},~{flags}"() nounwind, !srcloc !0<br>
> + ret void<br>
> +<br>
> +; CHECK: _f<br>
> +; CHECK: pushq %rbp<br>
> +; CHECK: .cfi_offset %rbp, -16<br>
> +; CHECK: movq %rsp, %rbp<br>
> +; CHECK: .cfi_def_cfa_register %rbp<br>
> +<br>
> +; We first push register on stack, and then realign it, so that<br>
> +; .cfi_offset value is correct<br>
> +; CHECK: pushq %r14<br>
> +; CHECK: andq $-32, %rsp<br>
> +; CHECK: .cfi_offset %r14, -24<br>
> +<br>
> +; Restore %rsp from %rbp and subtract the total size of saved regsiters.<br>
> +; CHECK: leaq -8(%rbp), %rsp<br>
> +<br>
> +; Pop saved registers.<br>
> +; CHECK: popq %r14<br>
> +; CHECK: popq %rbp<br>
> +}<br>
> +<br>
> +!0 = metadata !{i32 125}<br>
> +<br>
><br>
><br>
> _______________________________________________<br>
> llvm-commits mailing list<br>
> <a href="mailto:llvm-commits@cs.uiuc.edu">llvm-commits@cs.uiuc.edu</a><br>
> <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits" target="_blank">http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits</a><br>
</div></div></blockquote></div><br><br clear="all"><div><br></div>-- <br><div>Alexey Samsonov, MSK</div><br>