[llvm-commits] [llvm] r158654 - in /llvm/trunk: lib/Target/X86/X86FrameLowering.cpp lib/Target/X86/X86RegisterInfo.cpp lib/Target/X86/X86RegisterInfo.h test/CodeGen/X86/alloca-align-rounding-32.ll test/CodeGen/X86/alloca-align-rounding.ll test/CodeGen/X86/dynamic-allocas-VLAs.ll

Mon Jun 18 00:03:13 PDT 2012

Author: chandlerc
Date: Mon Jun 18 02:03:12 2012
New Revision: 158654

URL: http://llvm.org/viewvc/llvm-project?rev=158654&view=rev
Log:
Temporarily revert r158087.

This patch causes problems when both dynamic stack realignment and
dynamic allocas combine in the same function. With this patch, we no
longer build the epilog correctly, and silently restore registers from
the wrong position in the stack.

Thanks to Matt for tracking this down, and getting at least an initial
test case to Chad. I'm going to try to check a variation of that test
case in so we can easily track the fixes required.

Removed:
    llvm/trunk/test/CodeGen/X86/dynamic-allocas-VLAs.ll
Modified:
    llvm/trunk/lib/Target/X86/X86FrameLowering.cpp
    llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp
    llvm/trunk/lib/Target/X86/X86RegisterInfo.h
    llvm/trunk/test/CodeGen/X86/alloca-align-rounding-32.ll
    llvm/trunk/test/CodeGen/X86/alloca-align-rounding.ll

Modified: llvm/trunk/lib/Target/X86/X86FrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FrameLowering.cpp?rev=158654&r1=158653&r2=158654&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86FrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86FrameLowering.cpp Mon Jun 18 02:03:12 2012
@@ -650,7 +650,6 @@
   unsigned SlotSize = RegInfo->getSlotSize();
   unsigned FramePtr = RegInfo->getFrameRegister(MF);
   unsigned StackPtr = RegInfo->getStackRegister();
-  unsigned BasePtr = RegInfo->getBaseRegister();
   DebugLoc DL;
 
   // If we're forcing a stack realignment we can't rely on just the frame
@@ -914,18 +913,6 @@
     emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
                  UseLEA, TII, *RegInfo);
 
-  // If we need a base pointer, set it up here. It's whatever the value
-  // of the stack pointer is at this point. Any variable size objects
-  // will be allocated after this, so we can still use the base pointer
-  // to reference locals.
-  if (RegInfo->hasBasePointer(MF)) {
-    // Update the frame pointer with the current stack pointer.
-    unsigned Opc = Is64Bit ? X86::MOV64rr : X86::MOV32rr;
-    BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
-      .addReg(StackPtr)
-      .setMIFlag(MachineInstr::FrameSetup);
-  }
-
   if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) {
     // Mark end of stack pointer adjustment.
     MCSymbol *Label = MMI.getContext().CreateTempSymbol();
@@ -1161,16 +1148,7 @@
   int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
   uint64_t StackSize = MFI->getStackSize();
 
-  if (RegInfo->hasBasePointer(MF)) {
-    assert (hasFP(MF) && "VLAs and dynamic stack realign, but no FP?!");
-    if (FI < 0) {
-      // Skip the saved EBP.
-      return Offset + RegInfo->getSlotSize();
-    } else {
-      assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
-      return Offset + StackSize;
-    }
-  } else if (RegInfo->needsStackRealignment(MF)) {
+  if (RegInfo->needsStackRealignment(MF)) {
     if (FI < 0) {
       // Skip the saved EBP.
       return Offset + RegInfo->getSlotSize();
@@ -1201,14 +1179,9 @@
   const X86RegisterInfo *RegInfo =
       static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo());
   // We can't calculate offset from frame pointer if the stack is realigned,
-  // so enforce usage of stack/base pointer.  The base pointer is used when we
-  // have dynamic allocas in addition to dynamic realignment.
-  if (RegInfo->hasBasePointer(MF))
-    FrameReg = RegInfo->getBaseRegister();
-  else if (RegInfo->needsStackRealignment(MF))
-    FrameReg = RegInfo->getStackRegister();
-  else
-    FrameReg = RegInfo->getFrameRegister(MF);
+  // so enforce usage of stack pointer.
+  FrameReg = (RegInfo->needsStackRealignment(MF)) ? 
+    RegInfo->getStackRegister() : RegInfo->getFrameRegister(MF);
   return getFrameIndexOffset(MF, FI);
 }
 
@@ -1345,10 +1318,6 @@
            "Slot for EBP register must be last in order to be found!");
     (void)FrameIdx;
   }
-
-  // Spill the BasePtr if it's used.
-  if (RegInfo->hasBasePointer(MF))
-    MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister());
 }
 
 static bool

Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp?rev=158654&r1=158653&r2=158654&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp Mon Jun 18 02:03:12 2012
@@ -50,10 +50,6 @@
                            " needed for the function."),
                  cl::init(false), cl::Hidden);
 
-cl::opt<bool>
-EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true),
-          cl::desc("Enable use of a base pointer for complex stack frames"));
-
 X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
                                  const TargetInstrInfo &tii)
   : X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit()
@@ -72,12 +68,10 @@
     SlotSize = 8;
     StackPtr = X86::RSP;
     FramePtr = X86::RBP;
-    BasePtr = X86::RBX;
   } else {
     SlotSize = 4;
     StackPtr = X86::ESP;
     FramePtr = X86::EBP;
-    BasePtr = X86::EBX;
   }
 }
 
@@ -296,20 +290,6 @@
       Reserved.set(*I);
   }
 
-  // Set the base-pointer register and its aliases as reserved if needed.
-  if (hasBasePointer(MF)) {
-    CallingConv::ID CC = MF.getFunction()->getCallingConv();
-    const uint32_t* RegMask = getCallPreservedMask(CC);
-    if (MachineOperand::clobbersPhysReg(RegMask, getBaseRegister()))
-      report_fatal_error(
-        "Stack realignment in presence of dynamic allocas is not supported with"
-        "this calling convention.");
-
-    Reserved.set(getBaseRegister());
-    for (MCSubRegIterator I(getBaseRegister(), this); I.isValid(); ++I)
-      Reserved.set(*I);
-  }
-
   // Mark the segment registers as reserved.
   Reserved.set(X86::CS);
   Reserved.set(X86::SS);
@@ -360,35 +340,10 @@
 // Stack Frame Processing methods
 //===----------------------------------------------------------------------===//
 
-bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
-   const MachineFrameInfo *MFI = MF.getFrameInfo();
-
-   if (!EnableBasePointer)
-     return false;
-
-   // When we need stack realignment and there are dynamic allocas, we can't 
-   // reference off of the stack pointer, so we reserve a base pointer.
-   if (needsStackRealignment(MF) && MFI->hasVarSizedObjects())
-     return true;
-
-   return false;
-}
-
 bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-  const MachineRegisterInfo *MRI = &MF.getRegInfo();
-  if (!MF.getTarget().Options.RealignStack)
-    return false;
-
-  // Stack realignment requires a frame pointer.  If we already started
-  // register allocation with frame pointer elimination, it is too late now.
-  if (!MRI->canReserveReg(FramePtr))
-    return false;
-
-  // If base pointer is necessary.  Check that it isn't too late to reserve it.
-  if (MFI->hasVarSizedObjects())
-    return MRI->canReserveReg(BasePtr);
-  return true;
+  return (MF.getTarget().Options.RealignStack &&
+          !MFI->hasVarSizedObjects());
 }
 
 bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
@@ -398,6 +353,13 @@
   bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
                                F->hasFnAttr(Attribute::StackAlignment));
 
+  // FIXME: Currently we don't support stack realignment for functions with
+  //        variable-sized allocas.
+  // FIXME: It's more complicated than this...
+  if (0 && requiresRealignment && MFI->hasVarSizedObjects())
+    report_fatal_error(
+      "Stack realignment in presence of dynamic allocas is not supported");
+
   // If we've requested that we force align the stack do so now.
   if (ForceStackAlign)
     return canRealignStack(MF);
@@ -537,9 +499,7 @@
 
   unsigned Opc = MI.getOpcode();
   bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm;
-  if (hasBasePointer(MF))
-    BasePtr = getBaseRegister();
-  else if (needsStackRealignment(MF))
+  if (needsStackRealignment(MF))
     BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr);
   else if (AfterFPPop)
     BasePtr = StackPtr;

Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.h?rev=158654&r1=158653&r2=158654&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86RegisterInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86RegisterInfo.h Mon Jun 18 02:03:12 2012
@@ -50,11 +50,6 @@
   ///
   unsigned FramePtr;
 
-  /// BasePtr - X86 physical register used as a base ptr in complex stack
-  /// frames. I.e., when we need a 3rd base, not just SP and FP, due to
-  /// variable size stack objects.
-  unsigned BasePtr;
-
 public:
   X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii);
 
@@ -111,8 +106,6 @@
   /// register scavenger to determine what registers are free.
   BitVector getReservedRegs(const MachineFunction &MF) const;
 
-  bool hasBasePointer(const MachineFunction &MF) const;
-
   bool canRealignStack(const MachineFunction &MF) const;
 
   bool needsStackRealignment(const MachineFunction &MF) const;
@@ -130,7 +123,6 @@
   // Debug information queries.
   unsigned getFrameRegister(const MachineFunction &MF) const;
   unsigned getStackRegister() const { return StackPtr; }
-  unsigned getBaseRegister() const { return BasePtr; }
   // FIXME: Move to FrameInfok
   unsigned getSlotSize() const { return SlotSize; }
 

Modified: llvm/trunk/test/CodeGen/X86/alloca-align-rounding-32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/alloca-align-rounding-32.ll?rev=158654&r1=158653&r2=158654&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/alloca-align-rounding-32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/alloca-align-rounding-32.ll Mon Jun 18 02:03:12 2012
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin | FileCheck %s
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin | grep and | count 1
 
 declare void @bar(<2 x i64>* %n)
 
@@ -6,15 +6,10 @@
   %p = alloca <2 x i64>, i32 %h
   call void @bar(<2 x i64>* %p)
   ret void
-; CHECK: foo
-; CHECK-NOT: andl $-32, %eax
 }
 
 define void @foo2(i32 %h) {
   %p = alloca <2 x i64>, i32 %h, align 32
   call void @bar(<2 x i64>* %p)
   ret void
-; CHECK: foo2
-; CHECK: andl $-32, %esp
-; CHECK: andl $-32, %eax
 }

Modified: llvm/trunk/test/CodeGen/X86/alloca-align-rounding.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/alloca-align-rounding.ll?rev=158654&r1=158653&r2=158654&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/alloca-align-rounding.ll (original)
+++ llvm/trunk/test/CodeGen/X86/alloca-align-rounding.ll Mon Jun 18 02:03:12 2012
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux | grep and | count 1
 
 declare void @bar(<2 x i64>* %n)
 
@@ -6,15 +6,10 @@
   %p = alloca <2 x i64>, i64 %h
   call void @bar(<2 x i64>* %p)
   ret void
-; CHECK: foo
-; CHECK-NOT: andq $-32, %rax
 }
 
 define void @foo2(i64 %h) {
   %p = alloca <2 x i64>, i64 %h, align 32
   call void @bar(<2 x i64>* %p)
   ret void
-; CHECK: foo2
-; CHECK: andq $-32, %rsp
-; CHECK: andq $-32, %rax
 }

Removed: llvm/trunk/test/CodeGen/X86/dynamic-allocas-VLAs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/dynamic-allocas-VLAs.ll?rev=158653&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/dynamic-allocas-VLAs.ll (original)
+++ llvm/trunk/test/CodeGen/X86/dynamic-allocas-VLAs.ll (removed)
@@ -1,158 +0,0 @@
-; RUN: llc < %s -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s
-; rdar://11496434
-
-; no VLAs or dynamic alignment
-define i32 @t1() nounwind uwtable ssp {
-entry:
-  %a = alloca i32, align 4
-  call void @t1_helper(i32* %a) nounwind
-  %0 = load i32* %a, align 4
-  %add = add nsw i32 %0, 13
-  ret i32 %add
-
-; CHECK: _t1
-; CHECK-NOT: andq $-{{[0-9]+}}, %rsp
-; CHECK: leaq [[OFFSET:[0-9]*]](%rsp), %rdi
-; CHECK: callq _t1_helper
-; CHECK: movl [[OFFSET]](%rsp), %eax
-; CHECK: addl $13, %eax
-}
-
-declare void @t1_helper(i32*)
-
-; dynamic realignment
-define i32 @t2() nounwind uwtable ssp {
-entry:
-  %a = alloca i32, align 4
-  %v = alloca <8 x float>, align 32
-  call void @t2_helper(i32* %a, <8 x float>* %v) nounwind
-  %0 = load i32* %a, align 4
-  %add = add nsw i32 %0, 13
-  ret i32 %add
-
-; CHECK: _t2
-; CHECK: pushq %rbp
-; CHECK: movq %rsp, %rbp
-; CHECK: andq $-32, %rsp
-; CHECK: subq ${{[0-9]+}}, %rsp
-;
-; CHECK: leaq {{[0-9]*}}(%rsp), %rdi
-; CHECK: leaq {{[0-9]*}}(%rsp), %rsi
-; CHECK: callq _t2_helper
-;
-; CHECK: movq %rbp, %rsp
-; CHECK: popq %rbp
-}
-
-declare void @t2_helper(i32*, <8 x float>*)
-
-; VLAs
-define i32 @t3(i64 %sz) nounwind uwtable ssp {
-entry:
-  %a = alloca i32, align 4
-  %vla = alloca i32, i64 %sz, align 16
-  call void @t3_helper(i32* %a, i32* %vla) nounwind
-  %0 = load i32* %a, align 4
-  %add = add nsw i32 %0, 13
-  ret i32 %add
-
-; CHECK: _t3
-; CHECK: pushq %rbp
-; CHECK: movq %rsp, %rbp
-; CHECK: pushq %rbx
-; CHECK-NOT: andq $-{{[0-9]+}}, %rsp
-; CHECK: subq ${{[0-9]+}}, %rsp
-;
-; CHECK: leaq -{{[0-9]+}}(%rbp), %rsp
-; CHECK: popq %rbx
-; CHECK: popq %rbp
-}
-
-declare void @t3_helper(i32*, i32*)
-
-; VLAs + Dynamic realignment
-define i32 @t4(i64 %sz) nounwind uwtable ssp {
-entry:
-  %a = alloca i32, align 4
-  %v = alloca <8 x float>, align 32
-  %vla = alloca i32, i64 %sz, align 16
-  call void @t4_helper(i32* %a, i32* %vla, <8 x float>* %v) nounwind
-  %0 = load i32* %a, align 4
-  %add = add nsw i32 %0, 13
-  ret i32 %add
-
-; CHECK: _t4
-; CHECK: pushq %rbp
-; CHECK: movq %rsp, %rbp
-; CHECK: andq $-32, %rsp
-; CHECK: pushq %r14
-; CHECK: pushq %rbx
-; CHECK: subq $[[STACKADJ:[0-9]+]], %rsp
-; CHECK: movq %rsp, %rbx
-;
-; CHECK: leaq {{[0-9]*}}(%rbx), %rdi
-; CHECK: leaq {{[0-9]*}}(%rbx), %rdx
-; CHECK: callq   _t4_helper
-;
-; CHECK: addq $[[STACKADJ]], %rsp
-; CHECK: popq %rbx
-; CHECK: popq %r14
-; CHECK: movq %rbp, %rsp
-; CHECK: popq %rbp
-}
-
-declare void @t4_helper(i32*, i32*, <8 x float>*)
-
-; Dynamic realignment + Spill
-define i32 @t5(float* nocapture %f) nounwind uwtable ssp {
-entry:
-  %a = alloca i32, align 4
-  %0 = bitcast float* %f to <8 x float>*
-  %1 = load <8 x float>* %0, align 32
-  call void @t5_helper1(i32* %a) nounwind
-  call void @t5_helper2(<8 x float> %1) nounwind
-  %2 = load i32* %a, align 4
-  %add = add nsw i32 %2, 13
-  ret i32 %add
-
-; CHECK: _t5
-; CHECK: pushq %rbp
-; CHECK: movq %rsp, %rbp
-; CHECK: andq $-32, %rsp
-; CHECK: subq ${{[0-9]+}}, %rsp
-;
-; CHECK: vmovaps (%rdi), [[AVXREG:%ymm[0-9]+]]
-; CHECK: vmovaps [[AVXREG]], (%rsp)
-; CHECK: leaq {{[0-9]+}}(%rsp), %rdi
-; CHECK: callq   _t5_helper1
-; CHECK: vmovaps (%rsp), %ymm0
-; CHECK: callq   _t5_helper2
-; CHECK: movl {{[0-9]+}}(%rsp), %eax
-;
-; CHECK: movq %rbp, %rsp
-; CHECK: popq %rbp
-}
-
-declare void @t5_helper1(i32*)
-
-declare void @t5_helper2(<8 x float>)
-
-; VLAs + Dynamic realignment + Spill
-; FIXME: RA has already reserved RBX, so we can't do dynamic realignment.
-define i32 @t6(i64 %sz, float* nocapture %f) nounwind uwtable ssp {
-entry:
-; CHECK: _t6
-  %a = alloca i32, align 4
-  %0 = bitcast float* %f to <8 x float>*
-  %1 = load <8 x float>* %0, align 32
-  %vla = alloca i32, i64 %sz, align 16
-  call void @t6_helper1(i32* %a, i32* %vla) nounwind
-  call void @t6_helper2(<8 x float> %1) nounwind
-  %2 = load i32* %a, align 4
-  %add = add nsw i32 %2, 13
-  ret i32 %add
-}
-
-declare void @t6_helper1(i32*, i32*)
-
-declare void @t6_helper2(<8 x float>)