[llvm-commits] [llvm] r158087 - in /llvm/trunk: lib/Target/X86/X86FrameLowering.cpp lib/Target/X86/X86RegisterInfo.cpp lib/Target/X86/X86RegisterInfo.h test/CodeGen/X86/alloca-align-rounding-32.ll test/CodeGen/X86/alloca-align-rounding.ll test/Co

Thu Jun 14 10:51:50 PDT 2012

Hi Chad,

This is causing some breakage. In functions with stack realignment and
dynamic allocas (and possibly some other conditions that I don't yet
fully understand), we generate an epilog that adds a constant to %rsp
rather than recalculating it relative to %rbp before popping
callee-save registers. I don't have a small test case yet, but I
wanted to give you a heads up.

-Matt

On Wed, Jun 6, 2012 at 10:37 AM, Chad Rosier <mcrosier at apple.com> wrote:
> Author: mcrosier
> Date: Wed Jun  6 12:37:40 2012
> New Revision: 158087
>
> URL: http://llvm.org/viewvc/llvm-project?rev=158087&view=rev
> Log:
> Add support for dynamic stack realignment in the presence of dynamic allocas on
> X86.
> rdar://11496434
>
> Added:
>    llvm/trunk/test/CodeGen/X86/dynamic-allocas-VLAs.ll
> Modified:
>    llvm/trunk/lib/Target/X86/X86FrameLowering.cpp
>    llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp
>    llvm/trunk/lib/Target/X86/X86RegisterInfo.h
>    llvm/trunk/test/CodeGen/X86/alloca-align-rounding-32.ll
>    llvm/trunk/test/CodeGen/X86/alloca-align-rounding.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86FrameLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FrameLowering.cpp?rev=158087&r1=158086&r2=158087&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86FrameLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86FrameLowering.cpp Wed Jun  6 12:37:40 2012
> @@ -650,6 +650,7 @@
>   unsigned SlotSize = RegInfo->getSlotSize();
>   unsigned FramePtr = RegInfo->getFrameRegister(MF);
>   unsigned StackPtr = RegInfo->getStackRegister();
> +  unsigned BasePtr = RegInfo->getBaseRegister();
>   DebugLoc DL;
>
>   // If we're forcing a stack realignment we can't rely on just the frame
> @@ -913,6 +914,18 @@
>     emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
>                  UseLEA, TII, *RegInfo);
>
> +  // If we need a base pointer, set it up here. It's whatever the value
> +  // of the stack pointer is at this point. Any variable size objects
> +  // will be allocated after this, so we can still use the base pointer
> +  // to reference locals.
> +  if (RegInfo->hasBasePointer(MF)) {
> +    // Update the frame pointer with the current stack pointer.
> +    unsigned Opc = Is64Bit ? X86::MOV64rr : X86::MOV32rr;
> +    BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
> +      .addReg(StackPtr)
> +      .setMIFlag(MachineInstr::FrameSetup);
> +  }
> +
>   if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) {
>     // Mark end of stack pointer adjustment.
>     MCSymbol *Label = MMI.getContext().CreateTempSymbol();
> @@ -1148,7 +1161,16 @@
>   int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
>   uint64_t StackSize = MFI->getStackSize();
>
> -  if (RegInfo->needsStackRealignment(MF)) {
> +  if (RegInfo->hasBasePointer(MF)) {
> +    assert (hasFP(MF) && "VLAs and dynamic stack realign, but no FP?!");
> +    if (FI < 0) {
> +      // Skip the saved EBP.
> +      return Offset + RegInfo->getSlotSize();
> +    } else {
> +      assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
> +      return Offset + StackSize;
> +    }
> +  } else if (RegInfo->needsStackRealignment(MF)) {
>     if (FI < 0) {
>       // Skip the saved EBP.
>       return Offset + RegInfo->getSlotSize();
> @@ -1179,9 +1201,14 @@
>   const X86RegisterInfo *RegInfo =
>       static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo());
>   // We can't calculate offset from frame pointer if the stack is realigned,
> -  // so enforce usage of stack pointer.
> -  FrameReg = (RegInfo->needsStackRealignment(MF)) ?
> -    RegInfo->getStackRegister() : RegInfo->getFrameRegister(MF);
> +  // so enforce usage of stack/base pointer.  The base pointer is used when we
> +  // have dynamic allocas in addition to dynamic realignment.
> +  if (RegInfo->hasBasePointer(MF))
> +    FrameReg = RegInfo->getBaseRegister();
> +  else if (RegInfo->needsStackRealignment(MF))
> +    FrameReg = RegInfo->getStackRegister();
> +  else
> +    FrameReg = RegInfo->getFrameRegister(MF);
>   return getFrameIndexOffset(MF, FI);
>  }
>
> @@ -1318,6 +1345,10 @@
>            "Slot for EBP register must be last in order to be found!");
>     (void)FrameIdx;
>   }
> +
> +  // Spill the BasePtr if it's used.
> +  if (RegInfo->hasBasePointer(MF))
> +    MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister());
>  }
>
>  static bool
>
> Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp?rev=158087&r1=158086&r2=158087&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp Wed Jun  6 12:37:40 2012
> @@ -50,6 +50,10 @@
>                            " needed for the function."),
>                  cl::init(false), cl::Hidden);
>
> +cl::opt<bool>
> +EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true),
> +          cl::desc("Enable use of a base pointer for complex stack frames"));
> +
>  X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
>                                  const TargetInstrInfo &tii)
>   : X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit()
> @@ -68,10 +72,12 @@
>     SlotSize = 8;
>     StackPtr = X86::RSP;
>     FramePtr = X86::RBP;
> +    BasePtr = X86::RBX;
>   } else {
>     SlotSize = 4;
>     StackPtr = X86::ESP;
>     FramePtr = X86::EBP;
> +    BasePtr = X86::EBX;
>   }
>  }
>
> @@ -290,6 +296,20 @@
>       Reserved.set(*I);
>   }
>
> +  // Set the base-pointer register and its aliases as reserved if needed.
> +  if (hasBasePointer(MF)) {
> +    CallingConv::ID CC = MF.getFunction()->getCallingConv();
> +    const uint32_t* RegMask = getCallPreservedMask(CC);
> +    if (MachineOperand::clobbersPhysReg(RegMask, getBaseRegister()))
> +      report_fatal_error(
> +        "Stack realignment in presence of dynamic allocas is not supported with"
> +        "this calling convention.");
> +
> +    Reserved.set(getBaseRegister());
> +    for (MCSubRegIterator I(getBaseRegister(), this); I.isValid(); ++I)
> +      Reserved.set(*I);
> +  }
> +
>   // Mark the segment registers as reserved.
>   Reserved.set(X86::CS);
>   Reserved.set(X86::SS);
> @@ -340,10 +360,35 @@
>  // Stack Frame Processing methods
>  //===----------------------------------------------------------------------===//
>
> +bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
> +   const MachineFrameInfo *MFI = MF.getFrameInfo();
> +
> +   if (!EnableBasePointer)
> +     return false;
> +
> +   // When we need stack realignment and there are dynamic allocas, we can't
> +   // reference off of the stack pointer, so we reserve a base pointer.
> +   if (needsStackRealignment(MF) && MFI->hasVarSizedObjects())
> +     return true;
> +
> +   return false;
> +}
> +
>  bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
>   const MachineFrameInfo *MFI = MF.getFrameInfo();
> -  return (MF.getTarget().Options.RealignStack &&
> -          !MFI->hasVarSizedObjects());
> +  const MachineRegisterInfo *MRI = &MF.getRegInfo();
> +  if (!MF.getTarget().Options.RealignStack)
> +    return false;
> +
> +  // Stack realignment requires a frame pointer.  If we already started
> +  // register allocation with frame pointer elimination, it is too late now.
> +  if (!MRI->canReserveReg(FramePtr))
> +    return false;
> +
> +  // If base pointer is necessary.  Check that it isn't too late to reserve it.
> +  if (MFI->hasVarSizedObjects())
> +    return MRI->canReserveReg(BasePtr);
> +  return true;
>  }
>
>  bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
> @@ -353,13 +398,6 @@
>   bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
>                                F->hasFnAttr(Attribute::StackAlignment));
>
> -  // FIXME: Currently we don't support stack realignment for functions with
> -  //        variable-sized allocas.
> -  // FIXME: It's more complicated than this...
> -  if (0 && requiresRealignment && MFI->hasVarSizedObjects())
> -    report_fatal_error(
> -      "Stack realignment in presence of dynamic allocas is not supported");
> -
>   // If we've requested that we force align the stack do so now.
>   if (ForceStackAlign)
>     return canRealignStack(MF);
> @@ -499,7 +537,9 @@
>
>   unsigned Opc = MI.getOpcode();
>   bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm;
> -  if (needsStackRealignment(MF))
> +  if (hasBasePointer(MF))
> +    BasePtr = getBaseRegister();
> +  else if (needsStackRealignment(MF))
>     BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr);
>   else if (AfterFPPop)
>     BasePtr = StackPtr;
>
> Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.h?rev=158087&r1=158086&r2=158087&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86RegisterInfo.h (original)
> +++ llvm/trunk/lib/Target/X86/X86RegisterInfo.h Wed Jun  6 12:37:40 2012
> @@ -50,6 +50,11 @@
>   ///
>   unsigned FramePtr;
>
> +  /// BasePtr - X86 physical register used as a base ptr in complex stack
> +  /// frames. I.e., when we need a 3rd base, not just SP and FP, due to
> +  /// variable size stack objects.
> +  unsigned BasePtr;
> +
>  public:
>   X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii);
>
> @@ -106,6 +111,8 @@
>   /// register scavenger to determine what registers are free.
>   BitVector getReservedRegs(const MachineFunction &MF) const;
>
> +  bool hasBasePointer(const MachineFunction &MF) const;
> +
>   bool canRealignStack(const MachineFunction &MF) const;
>
>   bool needsStackRealignment(const MachineFunction &MF) const;
> @@ -123,6 +130,7 @@
>   // Debug information queries.
>   unsigned getFrameRegister(const MachineFunction &MF) const;
>   unsigned getStackRegister() const { return StackPtr; }
> +  unsigned getBaseRegister() const { return BasePtr; }
>   // FIXME: Move to FrameInfok
>   unsigned getSlotSize() const { return SlotSize; }
>
>
> Modified: llvm/trunk/test/CodeGen/X86/alloca-align-rounding-32.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/alloca-align-rounding-32.ll?rev=158087&r1=158086&r2=158087&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/alloca-align-rounding-32.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/alloca-align-rounding-32.ll Wed Jun  6 12:37:40 2012
> @@ -1,4 +1,4 @@
> -; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin | grep and | count 1
> +; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin | FileCheck %s
>
>  declare void @bar(<2 x i64>* %n)
>
> @@ -6,10 +6,15 @@
>   %p = alloca <2 x i64>, i32 %h
>   call void @bar(<2 x i64>* %p)
>   ret void
> +; CHECK: foo
> +; CHECK-NOT: andl $-32, %eax
>  }
>
>  define void @foo2(i32 %h) {
>   %p = alloca <2 x i64>, i32 %h, align 32
>   call void @bar(<2 x i64>* %p)
>   ret void
> +; CHECK: foo2
> +; CHECK: andl $-32, %esp
> +; CHECK: andl $-32, %eax
>  }
>
> Modified: llvm/trunk/test/CodeGen/X86/alloca-align-rounding.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/alloca-align-rounding.ll?rev=158087&r1=158086&r2=158087&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/alloca-align-rounding.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/alloca-align-rounding.ll Wed Jun  6 12:37:40 2012
> @@ -1,4 +1,4 @@
> -; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux | grep and | count 1
> +; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux | FileCheck %s
>
>  declare void @bar(<2 x i64>* %n)
>
> @@ -6,10 +6,15 @@
>   %p = alloca <2 x i64>, i64 %h
>   call void @bar(<2 x i64>* %p)
>   ret void
> +; CHECK: foo
> +; CHECK-NOT: andq $-32, %rax
>  }
>
>  define void @foo2(i64 %h) {
>   %p = alloca <2 x i64>, i64 %h, align 32
>   call void @bar(<2 x i64>* %p)
>   ret void
> +; CHECK: foo2
> +; CHECK: andq $-32, %rsp
> +; CHECK: andq $-32, %rax
>  }
>
> Added: llvm/trunk/test/CodeGen/X86/dynamic-allocas-VLAs.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/dynamic-allocas-VLAs.ll?rev=158087&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/dynamic-allocas-VLAs.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/dynamic-allocas-VLAs.ll Wed Jun  6 12:37:40 2012
> @@ -0,0 +1,158 @@
> +; RUN: llc < %s -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s
> +; rdar://11496434
> +
> +; no VLAs or dynamic alignment
> +define i32 @t1() nounwind uwtable ssp {
> +entry:
> +  %a = alloca i32, align 4
> +  call void @t1_helper(i32* %a) nounwind
> +  %0 = load i32* %a, align 4
> +  %add = add nsw i32 %0, 13
> +  ret i32 %add
> +
> +; CHECK: _t1
> +; CHECK-NOT: andq $-{{[0-9]+}}, %rsp
> +; CHECK: leaq [[OFFSET:[0-9]*]](%rsp), %rdi
> +; CHECK: callq _t1_helper
> +; CHECK: movl [[OFFSET]](%rsp), %eax
> +; CHECK: addl $13, %eax
> +}
> +
> +declare void @t1_helper(i32*)
> +
> +; dynamic realignment
> +define i32 @t2() nounwind uwtable ssp {
> +entry:
> +  %a = alloca i32, align 4
> +  %v = alloca <8 x float>, align 32
> +  call void @t2_helper(i32* %a, <8 x float>* %v) nounwind
> +  %0 = load i32* %a, align 4
> +  %add = add nsw i32 %0, 13
> +  ret i32 %add
> +
> +; CHECK: _t2
> +; CHECK: pushq %rbp
> +; CHECK: movq %rsp, %rbp
> +; CHECK: andq $-32, %rsp
> +; CHECK: subq ${{[0-9]+}}, %rsp
> +;
> +; CHECK: leaq {{[0-9]*}}(%rsp), %rdi
> +; CHECK: leaq {{[0-9]*}}(%rsp), %rsi
> +; CHECK: callq _t2_helper
> +;
> +; CHECK: movq %rbp, %rsp
> +; CHECK: popq %rbp
> +}
> +
> +declare void @t2_helper(i32*, <8 x float>*)
> +
> +; VLAs
> +define i32 @t3(i64 %sz) nounwind uwtable ssp {
> +entry:
> +  %a = alloca i32, align 4
> +  %vla = alloca i32, i64 %sz, align 16
> +  call void @t3_helper(i32* %a, i32* %vla) nounwind
> +  %0 = load i32* %a, align 4
> +  %add = add nsw i32 %0, 13
> +  ret i32 %add
> +
> +; CHECK: _t3
> +; CHECK: pushq %rbp
> +; CHECK: movq %rsp, %rbp
> +; CHECK: pushq %rbx
> +; CHECK-NOT: andq $-{{[0-9]+}}, %rsp
> +; CHECK: subq ${{[0-9]+}}, %rsp
> +;
> +; CHECK: leaq -{{[0-9]+}}(%rbp), %rsp
> +; CHECK: popq %rbx
> +; CHECK: popq %rbp
> +}
> +
> +declare void @t3_helper(i32*, i32*)
> +
> +; VLAs + Dynamic realignment
> +define i32 @t4(i64 %sz) nounwind uwtable ssp {
> +entry:
> +  %a = alloca i32, align 4
> +  %v = alloca <8 x float>, align 32
> +  %vla = alloca i32, i64 %sz, align 16
> +  call void @t4_helper(i32* %a, i32* %vla, <8 x float>* %v) nounwind
> +  %0 = load i32* %a, align 4
> +  %add = add nsw i32 %0, 13
> +  ret i32 %add
> +
> +; CHECK: _t4
> +; CHECK: pushq %rbp
> +; CHECK: movq %rsp, %rbp
> +; CHECK: andq $-32, %rsp
> +; CHECK: pushq %r14
> +; CHECK: pushq %rbx
> +; CHECK: subq $[[STACKADJ:[0-9]+]], %rsp
> +; CHECK: movq %rsp, %rbx
> +;
> +; CHECK: leaq {{[0-9]*}}(%rbx), %rdi
> +; CHECK: leaq {{[0-9]*}}(%rbx), %rdx
> +; CHECK: callq   _t4_helper
> +;
> +; CHECK: addq $[[STACKADJ]], %rsp
> +; CHECK: popq %rbx
> +; CHECK: popq %r14
> +; CHECK: movq %rbp, %rsp
> +; CHECK: popq %rbp
> +}
> +
> +declare void @t4_helper(i32*, i32*, <8 x float>*)
> +
> +; Dynamic realignment + Spill
> +define i32 @t5(float* nocapture %f) nounwind uwtable ssp {
> +entry:
> +  %a = alloca i32, align 4
> +  %0 = bitcast float* %f to <8 x float>*
> +  %1 = load <8 x float>* %0, align 32
> +  call void @t5_helper1(i32* %a) nounwind
> +  call void @t5_helper2(<8 x float> %1) nounwind
> +  %2 = load i32* %a, align 4
> +  %add = add nsw i32 %2, 13
> +  ret i32 %add
> +
> +; CHECK: _t5
> +; CHECK: pushq %rbp
> +; CHECK: movq %rsp, %rbp
> +; CHECK: andq $-32, %rsp
> +; CHECK: subq ${{[0-9]+}}, %rsp
> +;
> +; CHECK: vmovaps (%rdi), [[AVXREG:%ymm[0-9]+]]
> +; CHECK: vmovaps [[AVXREG]], (%rsp)
> +; CHECK: leaq {{[0-9]+}}(%rsp), %rdi
> +; CHECK: callq   _t5_helper1
> +; CHECK: vmovaps (%rsp), %ymm0
> +; CHECK: callq   _t5_helper2
> +; CHECK: movl {{[0-9]+}}(%rsp), %eax
> +;
> +; CHECK: movq %rbp, %rsp
> +; CHECK: popq %rbp
> +}
> +
> +declare void @t5_helper1(i32*)
> +
> +declare void @t5_helper2(<8 x float>)
> +
> +; VLAs + Dynamic realignment + Spill
> +; FIXME: RA has already reserved RBX, so we can't do dynamic realignment.
> +define i32 @t6(i64 %sz, float* nocapture %f) nounwind uwtable ssp {
> +entry:
> +; CHECK: _t6
> +  %a = alloca i32, align 4
> +  %0 = bitcast float* %f to <8 x float>*
> +  %1 = load <8 x float>* %0, align 32
> +  %vla = alloca i32, i64 %sz, align 16
> +  call void @t6_helper1(i32* %a, i32* %vla) nounwind
> +  call void @t6_helper2(<8 x float> %1) nounwind
> +  %2 = load i32* %a, align 4
> +  %add = add nsw i32 %2, 13
> +  ret i32 %add
> +}
> +
> +declare void @t6_helper1(i32*, i32*)
> +
> +declare void @t6_helper2(<8 x float>)
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits