[llvm-commits] [llvm] r158087 - in /llvm/trunk: lib/Target/X86/X86FrameLowering.cpp lib/Target/X86/X86RegisterInfo.cpp lib/Target/X86/X86RegisterInfo.h test/CodeGen/X86/alloca-align-rounding-32.ll test/CodeGen/X86/alloca-align-rounding.ll test/Co
Chad Rosier
mcrosier at apple.com
Thu Jun 14 12:15:48 PDT 2012
Hi Matt,
I'd be happy to investigate given a test case. I'm at WWDC today, but should be able to take a look tomorrow.
Chad
On Jun 14, 2012, at 10:51 AM, Matt Beaumont-Gay <matthewbg at google.com> wrote:
> Hi Chad,
>
> This is causing some breakage. In functions with stack realignment and
> dynamic allocas (and possibly some other conditions that I don't yet
> fully understand), we generate an epilog that adds a constant to %rsp
> rather than recalculating it relative to %rbp before popping
> callee-save registers. I don't have a small test case yet, but I
> wanted to give you a heads up.
>
> -Matt
>
> On Wed, Jun 6, 2012 at 10:37 AM, Chad Rosier <mcrosier at apple.com> wrote:
>> Author: mcrosier
>> Date: Wed Jun 6 12:37:40 2012
>> New Revision: 158087
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=158087&view=rev
>> Log:
>> Add support for dynamic stack realignment in the presence of dynamic allocas on
>> X86.
>> rdar://11496434
>>
>> Added:
>> llvm/trunk/test/CodeGen/X86/dynamic-allocas-VLAs.ll
>> Modified:
>> llvm/trunk/lib/Target/X86/X86FrameLowering.cpp
>> llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp
>> llvm/trunk/lib/Target/X86/X86RegisterInfo.h
>> llvm/trunk/test/CodeGen/X86/alloca-align-rounding-32.ll
>> llvm/trunk/test/CodeGen/X86/alloca-align-rounding.ll
>>
>> Modified: llvm/trunk/lib/Target/X86/X86FrameLowering.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FrameLowering.cpp?rev=158087&r1=158086&r2=158087&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/X86/X86FrameLowering.cpp (original)
>> +++ llvm/trunk/lib/Target/X86/X86FrameLowering.cpp Wed Jun 6 12:37:40 2012
>> @@ -650,6 +650,7 @@
>> unsigned SlotSize = RegInfo->getSlotSize();
>> unsigned FramePtr = RegInfo->getFrameRegister(MF);
>> unsigned StackPtr = RegInfo->getStackRegister();
>> + unsigned BasePtr = RegInfo->getBaseRegister();
>> DebugLoc DL;
>>
>> // If we're forcing a stack realignment we can't rely on just the frame
>> @@ -913,6 +914,18 @@
>> emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
>> UseLEA, TII, *RegInfo);
>>
>> + // If we need a base pointer, set it up here. It's whatever the value
>> + // of the stack pointer is at this point. Any variable size objects
>> + // will be allocated after this, so we can still use the base pointer
>> + // to reference locals.
>> + if (RegInfo->hasBasePointer(MF)) {
>> + // Update the frame pointer with the current stack pointer.
>> + unsigned Opc = Is64Bit ? X86::MOV64rr : X86::MOV32rr;
>> + BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
>> + .addReg(StackPtr)
>> + .setMIFlag(MachineInstr::FrameSetup);
>> + }
>> +
>> if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) {
>> // Mark end of stack pointer adjustment.
>> MCSymbol *Label = MMI.getContext().CreateTempSymbol();
>> @@ -1148,7 +1161,16 @@
>> int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
>> uint64_t StackSize = MFI->getStackSize();
>>
>> - if (RegInfo->needsStackRealignment(MF)) {
>> + if (RegInfo->hasBasePointer(MF)) {
>> + assert (hasFP(MF) && "VLAs and dynamic stack realign, but no FP?!");
>> + if (FI < 0) {
>> + // Skip the saved EBP.
>> + return Offset + RegInfo->getSlotSize();
>> + } else {
>> + assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
>> + return Offset + StackSize;
>> + }
>> + } else if (RegInfo->needsStackRealignment(MF)) {
>> if (FI < 0) {
>> // Skip the saved EBP.
>> return Offset + RegInfo->getSlotSize();
>> @@ -1179,9 +1201,14 @@
>> const X86RegisterInfo *RegInfo =
>> static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo());
>> // We can't calculate offset from frame pointer if the stack is realigned,
>> - // so enforce usage of stack pointer.
>> - FrameReg = (RegInfo->needsStackRealignment(MF)) ?
>> - RegInfo->getStackRegister() : RegInfo->getFrameRegister(MF);
>> + // so enforce usage of stack/base pointer. The base pointer is used when we
>> + // have dynamic allocas in addition to dynamic realignment.
>> + if (RegInfo->hasBasePointer(MF))
>> + FrameReg = RegInfo->getBaseRegister();
>> + else if (RegInfo->needsStackRealignment(MF))
>> + FrameReg = RegInfo->getStackRegister();
>> + else
>> + FrameReg = RegInfo->getFrameRegister(MF);
>> return getFrameIndexOffset(MF, FI);
>> }
>>
>> @@ -1318,6 +1345,10 @@
>> "Slot for EBP register must be last in order to be found!");
>> (void)FrameIdx;
>> }
>> +
>> + // Spill the BasePtr if it's used.
>> + if (RegInfo->hasBasePointer(MF))
>> + MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister());
>> }
>>
>> static bool
>>
>> Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp?rev=158087&r1=158086&r2=158087&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp (original)
>> +++ llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp Wed Jun 6 12:37:40 2012
>> @@ -50,6 +50,10 @@
>> " needed for the function."),
>> cl::init(false), cl::Hidden);
>>
>> +cl::opt<bool>
>> +EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true),
>> + cl::desc("Enable use of a base pointer for complex stack frames"));
>> +
>> X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
>> const TargetInstrInfo &tii)
>> : X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit()
>> @@ -68,10 +72,12 @@
>> SlotSize = 8;
>> StackPtr = X86::RSP;
>> FramePtr = X86::RBP;
>> + BasePtr = X86::RBX;
>> } else {
>> SlotSize = 4;
>> StackPtr = X86::ESP;
>> FramePtr = X86::EBP;
>> + BasePtr = X86::EBX;
>> }
>> }
>>
>> @@ -290,6 +296,20 @@
>> Reserved.set(*I);
>> }
>>
>> + // Set the base-pointer register and its aliases as reserved if needed.
>> + if (hasBasePointer(MF)) {
>> + CallingConv::ID CC = MF.getFunction()->getCallingConv();
>> + const uint32_t* RegMask = getCallPreservedMask(CC);
>> + if (MachineOperand::clobbersPhysReg(RegMask, getBaseRegister()))
>> + report_fatal_error(
>> + "Stack realignment in presence of dynamic allocas is not supported with"
>> + "this calling convention.");
>> +
>> + Reserved.set(getBaseRegister());
>> + for (MCSubRegIterator I(getBaseRegister(), this); I.isValid(); ++I)
>> + Reserved.set(*I);
>> + }
>> +
>> // Mark the segment registers as reserved.
>> Reserved.set(X86::CS);
>> Reserved.set(X86::SS);
>> @@ -340,10 +360,35 @@
>> // Stack Frame Processing methods
>> //===----------------------------------------------------------------------===//
>>
>> +bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
>> + const MachineFrameInfo *MFI = MF.getFrameInfo();
>> +
>> + if (!EnableBasePointer)
>> + return false;
>> +
>> + // When we need stack realignment and there are dynamic allocas, we can't
>> + // reference off of the stack pointer, so we reserve a base pointer.
>> + if (needsStackRealignment(MF) && MFI->hasVarSizedObjects())
>> + return true;
>> +
>> + return false;
>> +}
>> +
>> bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
>> const MachineFrameInfo *MFI = MF.getFrameInfo();
>> - return (MF.getTarget().Options.RealignStack &&
>> - !MFI->hasVarSizedObjects());
>> + const MachineRegisterInfo *MRI = &MF.getRegInfo();
>> + if (!MF.getTarget().Options.RealignStack)
>> + return false;
>> +
>> + // Stack realignment requires a frame pointer. If we already started
>> + // register allocation with frame pointer elimination, it is too late now.
>> + if (!MRI->canReserveReg(FramePtr))
>> + return false;
>> +
>> + // If base pointer is necessary. Check that it isn't too late to reserve it.
>> + if (MFI->hasVarSizedObjects())
>> + return MRI->canReserveReg(BasePtr);
>> + return true;
>> }
>>
>> bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
>> @@ -353,13 +398,6 @@
>> bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
>> F->hasFnAttr(Attribute::StackAlignment));
>>
>> - // FIXME: Currently we don't support stack realignment for functions with
>> - // variable-sized allocas.
>> - // FIXME: It's more complicated than this...
>> - if (0 && requiresRealignment && MFI->hasVarSizedObjects())
>> - report_fatal_error(
>> - "Stack realignment in presence of dynamic allocas is not supported");
>> -
>> // If we've requested that we force align the stack do so now.
>> if (ForceStackAlign)
>> return canRealignStack(MF);
>> @@ -499,7 +537,9 @@
>>
>> unsigned Opc = MI.getOpcode();
>> bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm;
>> - if (needsStackRealignment(MF))
>> + if (hasBasePointer(MF))
>> + BasePtr = getBaseRegister();
>> + else if (needsStackRealignment(MF))
>> BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr);
>> else if (AfterFPPop)
>> BasePtr = StackPtr;
>>
>> Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.h
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.h?rev=158087&r1=158086&r2=158087&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/X86/X86RegisterInfo.h (original)
>> +++ llvm/trunk/lib/Target/X86/X86RegisterInfo.h Wed Jun 6 12:37:40 2012
>> @@ -50,6 +50,11 @@
>> ///
>> unsigned FramePtr;
>>
>> + /// BasePtr - X86 physical register used as a base ptr in complex stack
>> + /// frames. I.e., when we need a 3rd base, not just SP and FP, due to
>> + /// variable size stack objects.
>> + unsigned BasePtr;
>> +
>> public:
>> X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii);
>>
>> @@ -106,6 +111,8 @@
>> /// register scavenger to determine what registers are free.
>> BitVector getReservedRegs(const MachineFunction &MF) const;
>>
>> + bool hasBasePointer(const MachineFunction &MF) const;
>> +
>> bool canRealignStack(const MachineFunction &MF) const;
>>
>> bool needsStackRealignment(const MachineFunction &MF) const;
>> @@ -123,6 +130,7 @@
>> // Debug information queries.
>> unsigned getFrameRegister(const MachineFunction &MF) const;
>> unsigned getStackRegister() const { return StackPtr; }
>> + unsigned getBaseRegister() const { return BasePtr; }
>> // FIXME: Move to FrameInfok
>> unsigned getSlotSize() const { return SlotSize; }
>>
>>
>> Modified: llvm/trunk/test/CodeGen/X86/alloca-align-rounding-32.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/alloca-align-rounding-32.ll?rev=158087&r1=158086&r2=158087&view=diff
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/X86/alloca-align-rounding-32.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/alloca-align-rounding-32.ll Wed Jun 6 12:37:40 2012
>> @@ -1,4 +1,4 @@
>> -; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin | grep and | count 1
>> +; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin | FileCheck %s
>>
>> declare void @bar(<2 x i64>* %n)
>>
>> @@ -6,10 +6,15 @@
>> %p = alloca <2 x i64>, i32 %h
>> call void @bar(<2 x i64>* %p)
>> ret void
>> +; CHECK: foo
>> +; CHECK-NOT: andl $-32, %eax
>> }
>>
>> define void @foo2(i32 %h) {
>> %p = alloca <2 x i64>, i32 %h, align 32
>> call void @bar(<2 x i64>* %p)
>> ret void
>> +; CHECK: foo2
>> +; CHECK: andl $-32, %esp
>> +; CHECK: andl $-32, %eax
>> }
>>
>> Modified: llvm/trunk/test/CodeGen/X86/alloca-align-rounding.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/alloca-align-rounding.ll?rev=158087&r1=158086&r2=158087&view=diff
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/X86/alloca-align-rounding.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/alloca-align-rounding.ll Wed Jun 6 12:37:40 2012
>> @@ -1,4 +1,4 @@
>> -; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux | grep and | count 1
>> +; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux | FileCheck %s
>>
>> declare void @bar(<2 x i64>* %n)
>>
>> @@ -6,10 +6,15 @@
>> %p = alloca <2 x i64>, i64 %h
>> call void @bar(<2 x i64>* %p)
>> ret void
>> +; CHECK: foo
>> +; CHECK-NOT: andq $-32, %rax
>> }
>>
>> define void @foo2(i64 %h) {
>> %p = alloca <2 x i64>, i64 %h, align 32
>> call void @bar(<2 x i64>* %p)
>> ret void
>> +; CHECK: foo2
>> +; CHECK: andq $-32, %rsp
>> +; CHECK: andq $-32, %rax
>> }
>>
>> Added: llvm/trunk/test/CodeGen/X86/dynamic-allocas-VLAs.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/dynamic-allocas-VLAs.ll?rev=158087&view=auto
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/X86/dynamic-allocas-VLAs.ll (added)
>> +++ llvm/trunk/test/CodeGen/X86/dynamic-allocas-VLAs.ll Wed Jun 6 12:37:40 2012
>> @@ -0,0 +1,158 @@
>> +; RUN: llc < %s -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s
>> +; rdar://11496434
>> +
>> +; no VLAs or dynamic alignment
>> +define i32 @t1() nounwind uwtable ssp {
>> +entry:
>> + %a = alloca i32, align 4
>> + call void @t1_helper(i32* %a) nounwind
>> + %0 = load i32* %a, align 4
>> + %add = add nsw i32 %0, 13
>> + ret i32 %add
>> +
>> +; CHECK: _t1
>> +; CHECK-NOT: andq $-{{[0-9]+}}, %rsp
>> +; CHECK: leaq [[OFFSET:[0-9]*]](%rsp), %rdi
>> +; CHECK: callq _t1_helper
>> +; CHECK: movl [[OFFSET]](%rsp), %eax
>> +; CHECK: addl $13, %eax
>> +}
>> +
>> +declare void @t1_helper(i32*)
>> +
>> +; dynamic realignment
>> +define i32 @t2() nounwind uwtable ssp {
>> +entry:
>> + %a = alloca i32, align 4
>> + %v = alloca <8 x float>, align 32
>> + call void @t2_helper(i32* %a, <8 x float>* %v) nounwind
>> + %0 = load i32* %a, align 4
>> + %add = add nsw i32 %0, 13
>> + ret i32 %add
>> +
>> +; CHECK: _t2
>> +; CHECK: pushq %rbp
>> +; CHECK: movq %rsp, %rbp
>> +; CHECK: andq $-32, %rsp
>> +; CHECK: subq ${{[0-9]+}}, %rsp
>> +;
>> +; CHECK: leaq {{[0-9]*}}(%rsp), %rdi
>> +; CHECK: leaq {{[0-9]*}}(%rsp), %rsi
>> +; CHECK: callq _t2_helper
>> +;
>> +; CHECK: movq %rbp, %rsp
>> +; CHECK: popq %rbp
>> +}
>> +
>> +declare void @t2_helper(i32*, <8 x float>*)
>> +
>> +; VLAs
>> +define i32 @t3(i64 %sz) nounwind uwtable ssp {
>> +entry:
>> + %a = alloca i32, align 4
>> + %vla = alloca i32, i64 %sz, align 16
>> + call void @t3_helper(i32* %a, i32* %vla) nounwind
>> + %0 = load i32* %a, align 4
>> + %add = add nsw i32 %0, 13
>> + ret i32 %add
>> +
>> +; CHECK: _t3
>> +; CHECK: pushq %rbp
>> +; CHECK: movq %rsp, %rbp
>> +; CHECK: pushq %rbx
>> +; CHECK-NOT: andq $-{{[0-9]+}}, %rsp
>> +; CHECK: subq ${{[0-9]+}}, %rsp
>> +;
>> +; CHECK: leaq -{{[0-9]+}}(%rbp), %rsp
>> +; CHECK: popq %rbx
>> +; CHECK: popq %rbp
>> +}
>> +
>> +declare void @t3_helper(i32*, i32*)
>> +
>> +; VLAs + Dynamic realignment
>> +define i32 @t4(i64 %sz) nounwind uwtable ssp {
>> +entry:
>> + %a = alloca i32, align 4
>> + %v = alloca <8 x float>, align 32
>> + %vla = alloca i32, i64 %sz, align 16
>> + call void @t4_helper(i32* %a, i32* %vla, <8 x float>* %v) nounwind
>> + %0 = load i32* %a, align 4
>> + %add = add nsw i32 %0, 13
>> + ret i32 %add
>> +
>> +; CHECK: _t4
>> +; CHECK: pushq %rbp
>> +; CHECK: movq %rsp, %rbp
>> +; CHECK: andq $-32, %rsp
>> +; CHECK: pushq %r14
>> +; CHECK: pushq %rbx
>> +; CHECK: subq $[[STACKADJ:[0-9]+]], %rsp
>> +; CHECK: movq %rsp, %rbx
>> +;
>> +; CHECK: leaq {{[0-9]*}}(%rbx), %rdi
>> +; CHECK: leaq {{[0-9]*}}(%rbx), %rdx
>> +; CHECK: callq _t4_helper
>> +;
>> +; CHECK: addq $[[STACKADJ]], %rsp
>> +; CHECK: popq %rbx
>> +; CHECK: popq %r14
>> +; CHECK: movq %rbp, %rsp
>> +; CHECK: popq %rbp
>> +}
>> +
>> +declare void @t4_helper(i32*, i32*, <8 x float>*)
>> +
>> +; Dynamic realignment + Spill
>> +define i32 @t5(float* nocapture %f) nounwind uwtable ssp {
>> +entry:
>> + %a = alloca i32, align 4
>> + %0 = bitcast float* %f to <8 x float>*
>> + %1 = load <8 x float>* %0, align 32
>> + call void @t5_helper1(i32* %a) nounwind
>> + call void @t5_helper2(<8 x float> %1) nounwind
>> + %2 = load i32* %a, align 4
>> + %add = add nsw i32 %2, 13
>> + ret i32 %add
>> +
>> +; CHECK: _t5
>> +; CHECK: pushq %rbp
>> +; CHECK: movq %rsp, %rbp
>> +; CHECK: andq $-32, %rsp
>> +; CHECK: subq ${{[0-9]+}}, %rsp
>> +;
>> +; CHECK: vmovaps (%rdi), [[AVXREG:%ymm[0-9]+]]
>> +; CHECK: vmovaps [[AVXREG]], (%rsp)
>> +; CHECK: leaq {{[0-9]+}}(%rsp), %rdi
>> +; CHECK: callq _t5_helper1
>> +; CHECK: vmovaps (%rsp), %ymm0
>> +; CHECK: callq _t5_helper2
>> +; CHECK: movl {{[0-9]+}}(%rsp), %eax
>> +;
>> +; CHECK: movq %rbp, %rsp
>> +; CHECK: popq %rbp
>> +}
>> +
>> +declare void @t5_helper1(i32*)
>> +
>> +declare void @t5_helper2(<8 x float>)
>> +
>> +; VLAs + Dynamic realignment + Spill
>> +; FIXME: RA has already reserved RBX, so we can't do dynamic realignment.
>> +define i32 @t6(i64 %sz, float* nocapture %f) nounwind uwtable ssp {
>> +entry:
>> +; CHECK: _t6
>> + %a = alloca i32, align 4
>> + %0 = bitcast float* %f to <8 x float>*
>> + %1 = load <8 x float>* %0, align 32
>> + %vla = alloca i32, i64 %sz, align 16
>> + call void @t6_helper1(i32* %a, i32* %vla) nounwind
>> + call void @t6_helper2(<8 x float> %1) nounwind
>> + %2 = load i32* %a, align 4
>> + %add = add nsw i32 %2, 13
>> + ret i32 %add
>> +}
>> +
>> +declare void @t6_helper1(i32*, i32*)
>> +
>> +declare void @t6_helper2(<8 x float>)
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list