[llvm-commits] [llvm] r160248 - in /llvm/trunk: lib/Target/X86/X86FrameLowering.cpp test/CodeGen/X86/dynamic-allocas-VLAs.ll test/CodeGen/X86/force-align-stack-alloca.ll test/CodeGen/X86/pr11468.ll

Alexey Samsonov samsonov at google.com
Sun Jul 15 23:54:09 PDT 2012


Author: samsonov
Date: Mon Jul 16 01:54:09 2012
New Revision: 160248

URL: http://llvm.org/viewvc/llvm-project?rev=160248&view=rev
Log:
This CL changes the function prologue and epilogue emitted on X86 when stack needs realignment.
It is intended to fix PR11468.

Old prologue and epilogue looked like this:
push %rbp
mov %rsp, %rbp
and $alignment, %rsp
push %r14
push %r15
...
pop %r15
pop %r14
mov %rbp, %rsp
pop %rbp

The problem was to reference the locations of callee-saved registers in exception handling:
locations of callee-saved had to be re-calculated regarding the stack alignment operation. It would
take some effort to implement this in LLVM, as currently MachineLocation can only have the form
"Register + Offset". Funciton prologue and epilogue are now changed to:

push %rbp
mov %rsp, %rbp
push %14
push %15
and $alignment, %rsp
...
lea -$size_of_saved_registers(%rbp), %rsp
pop %r15
pop %r14
pop %rbp

Reviewed by Chad Rosier.

Added:
    llvm/trunk/test/CodeGen/X86/pr11468.ll
Modified:
    llvm/trunk/lib/Target/X86/X86FrameLowering.cpp
    llvm/trunk/test/CodeGen/X86/dynamic-allocas-VLAs.ll
    llvm/trunk/test/CodeGen/X86/force-align-stack-alloca.ll

Modified: llvm/trunk/lib/Target/X86/X86FrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FrameLowering.cpp?rev=160248&r1=160247&r2=160248&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86FrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86FrameLowering.cpp Mon Jul 16 01:54:09 2012
@@ -722,10 +722,14 @@
   if (HasFP) {
     // Calculate required stack adjustment.
     uint64_t FrameSize = StackSize - SlotSize;
-    if (RegInfo->needsStackRealignment(MF))
-      FrameSize = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
-
-    NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
+    if (RegInfo->needsStackRealignment(MF)) {
+      // Callee-saved registers are pushed on stack before the stack
+      // is realigned.
+      FrameSize -= X86FI->getCalleeSavedFrameSize();
+      NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
+    } else {
+      NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
+    }
 
     // Get the offset of the stack slot for the EBP register, which is
     // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
@@ -782,19 +786,6 @@
     for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
          I != E; ++I)
       I->addLiveIn(FramePtr);
-
-    // Realign stack
-    if (RegInfo->needsStackRealignment(MF)) {
-      MachineInstr *MI =
-        BuildMI(MBB, MBBI, DL,
-                TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), StackPtr)
-        .addReg(StackPtr)
-        .addImm(-MaxAlign)
-        .setMIFlag(MachineInstr::FrameSetup);
-
-      // The EFLAGS implicit def is dead.
-      MI->getOperand(3).setIsDead();
-    }
   } else {
     NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
   }
@@ -824,6 +815,27 @@
     }
   }
 
+  // Realign stack after we pushed callee-saved registers (so that we'll be
+  // able to calculate their offsets from the frame pointer).
+
+  // NOTE: We push the registers before realigning the stack, so
+  // vector callee-saved (xmm) registers may be saved w/o proper
+  // alignment in this way. However, currently these regs are saved in
+  // stack slots (see X86FrameLowering::spillCalleeSavedRegisters()), so
+  // this shouldn't be a problem.
+  if (RegInfo->needsStackRealignment(MF)) {
+    assert(HasFP && "There should be a frame pointer if stack is realigned.");
+    MachineInstr *MI =
+      BuildMI(MBB, MBBI, DL,
+              TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), StackPtr)
+      .addReg(StackPtr)
+      .addImm(-MaxAlign)
+      .setMIFlag(MachineInstr::FrameSetup);
+
+    // The EFLAGS implicit def is dead.
+    MI->getOperand(3).setIsDead();
+  }
+
   DL = MBB.findDebugLoc(MBBI);
 
   // If there is an SUB32ri of ESP immediately before this instruction, merge
@@ -975,7 +987,6 @@
   unsigned SlotSize = RegInfo->getSlotSize();
   unsigned FramePtr = RegInfo->getFrameRegister(MF);
   unsigned StackPtr = RegInfo->getStackRegister();
-  unsigned BasePtr = RegInfo->getBaseRegister();
 
   switch (RetOpcode) {
   default:
@@ -1013,10 +1024,14 @@
   if (hasFP(MF)) {
     // Calculate required stack adjustment.
     uint64_t FrameSize = StackSize - SlotSize;
-    if (RegInfo->needsStackRealignment(MF))
-      FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign;
-
-    NumBytes = FrameSize - CSSize;
+    if (RegInfo->needsStackRealignment(MF)) {
+      // Callee-saved registers were pushed on stack before the stack
+      // was realigned.
+      FrameSize -= CSSize;
+      NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
+    } else {
+      NumBytes = FrameSize - CSSize;
+    }
 
     // Pop EBP.
     BuildMI(MBB, MBBI, DL,
@@ -1026,7 +1041,6 @@
   }
 
   // Skip the callee-saved pop instructions.
-  MachineBasicBlock::iterator LastCSPop = MBBI;
   while (MBBI != MBB.begin()) {
     MachineBasicBlock::iterator PI = prior(MBBI);
     unsigned Opc = PI->getOpcode();
@@ -1037,6 +1051,7 @@
 
     --MBBI;
   }
+  MachineBasicBlock::iterator FirstCSPop = MBBI;
 
   DL = MBBI->getDebugLoc();
 
@@ -1045,40 +1060,19 @@
   if (NumBytes || MFI->hasVarSizedObjects())
     mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
 
-  // Restore the SP from the BP, if necessary.
-  if (RegInfo->hasBasePointer(MF)) {
-    BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
-            StackPtr).addReg(BasePtr);
-
-    // When restoring from the BP we must use a cached SP adjustment.
-    NumBytes = X86FI->getBasePtrStackAdjustment();
-  }
-
   // If dynamic alloca is used, then reset esp to point to the last callee-saved
   // slot before popping them off! Same applies for the case, when stack was
   // realigned.
-  if (RegInfo->needsStackRealignment(MF)) {
-    // We cannot use LEA here, because stack pointer was realigned. We need to
-    // deallocate local frame back.
-    if (CSSize) {
-      emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, UseLEA, TII,
-                   *RegInfo);
-      MBBI = prior(LastCSPop);
-    }
-
-    BuildMI(MBB, MBBI, DL,
-            TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
-            StackPtr).addReg(FramePtr);
-  } else if (MFI->hasVarSizedObjects()) {
-    if (CSSize) {
-      unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r;
-      MachineInstr *MI =
-        addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr),
-                     FramePtr, false, -CSSize);
-      MBB.insert(MBBI, MI);
+  if (RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) {
+    if (RegInfo->needsStackRealignment(MF))
+      MBBI = FirstCSPop;
+    if (CSSize != 0) {
+      unsigned Opc = getLEArOpcode(Is64Bit);
+      addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
+                   FramePtr, false, -CSSize);
     } else {
-      BuildMI(MBB, MBBI, DL,
-              TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr)
+      unsigned Opc = (Is64Bit ? X86::MOV64rr : X86::MOV32rr);
+      BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
         .addReg(FramePtr);
     }
   } else if (NumBytes) {

Modified: llvm/trunk/test/CodeGen/X86/dynamic-allocas-VLAs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/dynamic-allocas-VLAs.ll?rev=160248&r1=160247&r2=160248&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/dynamic-allocas-VLAs.ll (original)
+++ llvm/trunk/test/CodeGen/X86/dynamic-allocas-VLAs.ll Mon Jul 16 01:54:09 2012
@@ -85,20 +85,19 @@
 ; CHECK: _t4
 ; CHECK: pushq %rbp
 ; CHECK: movq %rsp, %rbp
-; CHECK: andq $-32, %rsp
 ; CHECK: pushq %r14
 ; CHECK: pushq %rbx
-; CHECK: subq $[[STACKADJ:[0-9]+]], %rsp
+; CHECK: andq $-32, %rsp
+; CHECK: subq ${{[0-9]+}}, %rsp
 ; CHECK: movq %rsp, %rbx
 ;
 ; CHECK: leaq {{[0-9]*}}(%rbx), %rdi
 ; CHECK: leaq {{[0-9]*}}(%rbx), %rdx
 ; CHECK: callq   _t4_helper
 ;
-; CHECK: addq $[[STACKADJ]], %rsp
+; CHECK: leaq -16(%rbp), %rsp
 ; CHECK: popq %rbx
 ; CHECK: popq %r14
-; CHECK: movq %rbp, %rsp
 ; CHECK: popq %rbp
 }
 
@@ -176,19 +175,17 @@
 ; CHECK: _t7
 ; CHECK:     pushq %rbp
 ; CHECK:     movq %rsp, %rbp
-; CHECK:     andq $-32, %rsp
 ; CHECK:     pushq %rbx
-; CHECK:     subq $[[ADJ:[0-9]+]], %rsp
+; CHECK:     andq $-32, %rsp
+; CHECK:     subq ${{[0-9]+}}, %rsp
 ; CHECK:     movq %rsp, %rbx
 
 ; Stack adjustment for byval
 ; CHECK:     subq {{.*}}, %rsp
 ; CHECK:     callq _bar
 ; CHECK-NOT: addq {{.*}}, %rsp
-; CHECK:     movq %rbx, %rsp
-; CHECK:     addq $[[ADJ]], %rsp
+; CHECK:     leaq -8(%rbp), %rsp
 ; CHECK:     popq %rbx
-; CHECK:     movq %rbp, %rsp
 ; CHECK:     popq %rbp
 }
 
@@ -229,14 +226,12 @@
 ; FORCE-ALIGN: _t9
 ; FORCE-ALIGN: pushq %rbp
 ; FORCE-ALIGN: movq %rsp, %rbp
-; FORCE-ALIGN: andq $-32, %rsp
 ; FORCE-ALIGN: pushq %rbx
-; FORCE-ALIGN: subq $24, %rsp
+; FORCE-ALIGN: andq $-32, %rsp
+; FORCE-ALIGN: subq $32, %rsp
 ; FORCE-ALIGN: movq %rsp, %rbx
 
-; FORCE-ALIGN: movq %rbx, %rsp
-; FORCE-ALIGN: addq $24, %rsp
+; FORCE-ALIGN: leaq -8(%rbp), %rsp
 ; FORCE-ALIGN: popq %rbx
-; FORCE-ALIGN: movq %rbp, %rsp
 ; FORCE-ALIGN: popq %rbp
 }

Modified: llvm/trunk/test/CodeGen/X86/force-align-stack-alloca.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/force-align-stack-alloca.ll?rev=160248&r1=160247&r2=160248&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/force-align-stack-alloca.ll (original)
+++ llvm/trunk/test/CodeGen/X86/force-align-stack-alloca.ll Mon Jul 16 01:54:09 2012
@@ -19,10 +19,10 @@
 ; CHECK: g:
 ; CHECK:      pushl  %ebp
 ; CHECK-NEXT: movl   %esp, %ebp
-; CHECK-NEXT: andl   $-32, %esp
 ; CHECK-NEXT: pushl
 ; CHECK-NEXT: pushl
-; CHECK-NEXT: subl   $24, %esp
+; CHECK-NEXT: andl   $-32, %esp
+; CHECK-NEXT: subl   $32, %esp
 ;
 ; Now setup the base pointer (%ebx).
 ; CHECK-NEXT: movl   %esp, %ebx
@@ -46,17 +46,13 @@
 ; CHECK-NEXT: addl   $32, %esp
 ; CHECK-NOT:         {{[^ ,]*}}, %esp
 ;
-; Restore %esp from %ebx (base pointer) so we can pop the callee-saved
-; registers.  This is the state prior to the allocation of VLAs.
+; Restore %esp from %ebp (frame pointer) and subtract the size of
+; zone with callee-saved registers to pop them.
+; This is the state prior to stack realignment and the allocation of VLAs.
 ; CHECK-NOT:  popl
-; CHECK:      movl   %ebx, %esp
-; CHECK-NEXT: addl   $24, %esp
+; CHECK:      leal   -8(%ebp), %esp
 ; CHECK-NEXT: popl
 ; CHECK-NEXT: popl
-;
-; Finally we need to restore %esp from %ebp due to dynamic stack
-; realignment.
-; CHECK-NEXT: movl   %ebp, %esp
 ; CHECK-NEXT: popl   %ebp
 ; CHECK-NEXT: ret
 

Added: llvm/trunk/test/CodeGen/X86/pr11468.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr11468.ll?rev=160248&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr11468.ll (added)
+++ llvm/trunk/test/CodeGen/X86/pr11468.ll Mon Jul 16 01:54:09 2012
@@ -0,0 +1,33 @@
+; RUN: llc < %s -force-align-stack -stack-alignment=32 -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s
+; PR11468
+
+define void @f(i64 %sz) uwtable {
+entry:
+  %a = alloca i32, align 32
+  store volatile i32 0, i32* %a, align 32
+  ; force to push r14 on stack
+  call void asm sideeffect "nop", "~{r14},~{dirflag},~{fpsr},~{flags}"() nounwind, !srcloc !0
+  ret void
+
+; CHECK: _f
+; CHECK: pushq %rbp
+; CHECK: .cfi_offset %rbp, -16
+; CHECK: movq %rsp, %rbp
+; CHECK: .cfi_def_cfa_register %rbp
+
+; We first push register on stack, and then realign it, so that
+; .cfi_offset value is correct
+; CHECK: pushq %r14
+; CHECK: andq $-32, %rsp
+; CHECK: .cfi_offset %r14, -24
+
+; Restore %rsp from %rbp and subtract the total size of saved regsiters.
+; CHECK: leaq -8(%rbp), %rsp
+
+; Pop saved registers.
+; CHECK: popq %r14
+; CHECK: popq %rbp
+}
+
+!0 = metadata !{i32 125}
+





More information about the llvm-commits mailing list