[llvm] r175457 - Support for HiPE-compatible code emission, patch by Yiannis Tsiouris.
Benjamin Kramer
benny.kra at googlemail.com
Mon Feb 18 12:55:12 PST 2013
Author: d0k
Date: Mon Feb 18 14:55:12 2013
New Revision: 175457
URL: http://llvm.org/viewvc/llvm-project?rev=175457&view=rev
Log:
Support for HiPE-compatible code emission, patch by Yiannis Tsiouris.
Added:
llvm/trunk/test/CodeGen/X86/hipe-prologue.ll
Modified:
llvm/trunk/include/llvm/Target/TargetFrameLowering.h
llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp
llvm/trunk/lib/Target/X86/X86FrameLowering.cpp
llvm/trunk/lib/Target/X86/X86FrameLowering.h
Modified: llvm/trunk/include/llvm/Target/TargetFrameLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetFrameLowering.h?rev=175457&r1=175456&r2=175457&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Target/TargetFrameLowering.h (original)
+++ llvm/trunk/include/llvm/Target/TargetFrameLowering.h Mon Feb 18 14:55:12 2013
@@ -120,6 +120,10 @@ public:
/// by adding a check even before the "normal" function prologue.
virtual void adjustForSegmentedStacks(MachineFunction &MF) const { }
+ /// Adjust the prologue to add Erlang Run-Time System (ERTS) specific code in
+ /// the assembly prologue to explicitly handle the stack.
+ virtual void adjustForHiPEPrologue(MachineFunction &MF) const { }
+
/// spillCalleeSavedRegisters - Issues instruction(s) to spill all callee
/// saved registers and returns true if it isn't possible / profitable to do
/// so by issuing a series of store instructions via
Modified: llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp?rev=175457&r1=175456&r2=175457&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp (original)
+++ llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp Mon Feb 18 14:55:12 2013
@@ -693,6 +693,14 @@ void PEI::insertPrologEpilogCode(Machine
// space in small chunks instead of one large contiguous block.
if (Fn.getTarget().Options.EnableSegmentedStacks)
TFI.adjustForSegmentedStacks(Fn);
+
+ // Emit additional code that is required to explicitly handle the stack in
+ // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The
+ // approach is rather similar to that of Segmented Stacks, but it uses a
+ // different conditional check and another BIF for allocating more stack
+ // space.
+ if (Fn.getFunction()->getCallingConv() == CallingConv::HiPE)
+ TFI.adjustForHiPEPrologue(Fn);
}
/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
Modified: llvm/trunk/lib/Target/X86/X86FrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FrameLowering.cpp?rev=175457&r1=175456&r2=175457&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86FrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86FrameLowering.cpp Mon Feb 18 14:55:12 2013
@@ -1387,16 +1387,25 @@ HasNestArgument(const MachineFunction *M
}
-/// GetScratchRegister - Get a register for performing work in the segmented
-/// stack prologue. Depending on platform and the properties of the function
-/// either one or two registers will be needed. Set primary to true for
-/// the first register, false for the second.
+/// GetScratchRegister - Get a temp register for performing work in the
+/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
+/// and the properties of the function either one or two registers will be
+/// needed. Set primary to true for the first register, false for the second.
static unsigned
GetScratchRegister(bool Is64Bit, const MachineFunction &MF, bool Primary) {
+ CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv();
+
+ // Erlang stuff.
+ if (CallingConvention == CallingConv::HiPE) {
+ if (Is64Bit)
+ return Primary ? X86::R14 : X86::R13;
+ else
+ return Primary ? X86::EBX : X86::EDI;
+ }
+
if (Is64Bit)
return Primary ? X86::R11 : X86::R12;
- CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv();
bool IsNested = HasNestArgument(&MF);
if (CallingConvention == CallingConv::X86_FastCall ||
@@ -1602,4 +1611,145 @@ X86FrameLowering::adjustForSegmentedStac
#ifdef XDEBUG
MF.verify();
#endif
+}
+
+// Erlang programs may need a special prologue to handle the stack size they
+// might need at runtime. That is because Erlang/OTP does not implement a C
+// stack but uses a custom implementation of hybrid stack/heap
+// architecture. (for more information see Eric Stenman's Ph.D. thesis:
+// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
+//
+//
+// CheckStack:
+// temp0 = sp - MaxStack
+// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
+// OldStart:
+// ...
+// IncStack:
+// call inc_stack # doubles the stack space
+// temp0 = sp - MaxStack
+// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
+void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
+ const X86InstrInfo &TII = *TM.getInstrInfo();
+ const X86Subtarget *ST = &MF.getTarget().getSubtarget<X86Subtarget>();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const uint64_t SlotSize = TM.getRegisterInfo()->getSlotSize();
+ const bool Is64Bit = STI.is64Bit();
+ DebugLoc DL;
+ // HiPE-specific values
+ const unsigned HipeLeafWords = 24;
+ const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
+ const unsigned Guaranteed = HipeLeafWords * SlotSize;
+ const unsigned CallerStkArity =
+ std::max<int>(0, MF.getFunction()->arg_size() - CCRegisteredArgs);
+ unsigned MaxStack =
+ MFI->getStackSize() + CallerStkArity * SlotSize + SlotSize;
+
+ assert(ST->isTargetLinux() &&
+ "HiPE prologue is only supported on Linux operating systems.");
+
+ // Compute the largest caller's frame that is needed to fit the callees'
+ // frames. This 'MaxStack' is computed from:
+ //
+ // a) the fixed frame size, which is the space needed for all spilled temps,
+ // b) outgoing on-stack parameter areas, and
+ // c) the minimum stack space this function needs to make available for the
+ // functions it calls (a tunable ABI property).
+ if (MFI->hasCalls()) {
+ unsigned MoreStackForCalls = 0;
+
+ for (MachineFunction::iterator MBBI = MF.begin(), MBBE = MF.end();
+ MBBI != MBBE; ++MBBI)
+ for (MachineBasicBlock::iterator MI = MBBI->begin(), ME = MBBI->end();
+ MI != ME; ++MI)
+ if (MI->isCall()) {
+ // Get callee operand.
+ const MachineOperand &MO = MI->getOperand(0);
+ const Function *F;
+
+ // Only take account of global function calls (no closures etc.).
+ if (!MO.isGlobal()) continue;
+ if (!(F = dyn_cast<Function>(MO.getGlobal()))) continue;
+
+ // Do not update 'MaxStack' for primitive and built-in functions
+ // (encoded with names either starting with "erlang."/"bif_" or not
+ // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
+ // "_", such as the BIF "suspend_0") as they are executed on another
+ // stack.
+ if ((F->getName().find("erlang.") != std::string::npos) ||
+ (F->getName().find("bif_") != std::string::npos)) continue;
+ if (F->getName().find_first_of("._") == std::string::npos)
+ continue;
+
+ const uint64_t CalleeStkArity =
+ std::max<int64_t>(0, F->arg_size() - CCRegisteredArgs);
+ MoreStackForCalls = std::max<int64_t>(
+ MoreStackForCalls, (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
+ }
+ MaxStack += MoreStackForCalls;
+ }
+
+ // If the stack frame needed is larger than the guaranteed then runtime checks
+ // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
+ if (MaxStack > Guaranteed) {
+ MachineBasicBlock &prologueMBB = MF.front();
+ MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
+ MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
+
+ for (MachineBasicBlock::livein_iterator I = prologueMBB.livein_begin(),
+ E = prologueMBB.livein_end(); I != E; I++) {
+ stackCheckMBB->addLiveIn(*I);
+ incStackMBB->addLiveIn(*I);
+ }
+
+ MF.push_front(incStackMBB);
+ MF.push_front(stackCheckMBB);
+
+ unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
+ unsigned LEAop, CMPop, CALLop;
+ if (Is64Bit) {
+ SPReg = X86::RSP;
+ PReg = X86::RBP;
+ LEAop = X86::LEA64r;
+ CMPop = X86::CMP64rm;
+ CALLop = X86::CALL64pcrel32;
+ SPLimitOffset = 0x90;
+ } else {
+ SPReg = X86::ESP;
+ PReg = X86::EBP;
+ LEAop = X86::LEA32r;
+ CMPop = X86::CMP32rm;
+ CALLop = X86::CALLpcrel32;
+ SPLimitOffset = 0x4c;
+ }
+
+ ScratchReg = GetScratchRegister(Is64Bit, MF, true);
+ assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
+ "HiPE prologue scratch register is live-in");
+
+ // Create new MBB for StackCheck:
+ addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg),
+ SPReg, false, -MaxStack);
+ // SPLimitOffset is in a fixed heap location (pointed by BP).
+ addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop))
+ .addReg(ScratchReg), PReg, false, SPLimitOffset);
+ BuildMI(stackCheckMBB, DL, TII.get(X86::JAE_4)).addMBB(&prologueMBB);
+
+ // Create new MBB for IncStack:
+ BuildMI(incStackMBB, DL, TII.get(CALLop)).
+ addExternalSymbol("inc_stack_0");
+ addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg),
+ SPReg, false, -MaxStack);
+ addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop))
+ .addReg(ScratchReg), PReg, false, SPLimitOffset);
+ BuildMI(incStackMBB, DL, TII.get(X86::JLE_4)).addMBB(incStackMBB);
+
+ stackCheckMBB->addSuccessor(&prologueMBB, 99);
+ stackCheckMBB->addSuccessor(incStackMBB, 1);
+ incStackMBB->addSuccessor(&prologueMBB, 99);
+ incStackMBB->addSuccessor(incStackMBB, 1);
+ }
+#ifdef XDEBUG
+ MF.verify();
+#endif
}
Modified: llvm/trunk/lib/Target/X86/X86FrameLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FrameLowering.h?rev=175457&r1=175456&r2=175457&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86FrameLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86FrameLowering.h Mon Feb 18 14:55:12 2013
@@ -43,6 +43,8 @@ public:
void adjustForSegmentedStacks(MachineFunction &MF) const;
+ void adjustForHiPEPrologue(MachineFunction &MF) const;
+
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS = NULL) const;
Added: llvm/trunk/test/CodeGen/X86/hipe-prologue.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/hipe-prologue.ll?rev=175457&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/hipe-prologue.ll (added)
+++ llvm/trunk/test/CodeGen/X86/hipe-prologue.ll Mon Feb 18 14:55:12 2013
@@ -0,0 +1,67 @@
+; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -verify-machineinstrs | FileCheck %s -check-prefix=X32-Linux
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -verify-machineinstrs | FileCheck %s -check-prefix=X64-Linux
+
+; The HiPE compiler (i.e., the native code compiler of the Erlang/OTP system)
+; adds a custom assembly prologue in order to efficiently manipulate the stack
+; at runtime.
+
+; Just to prevent the alloca from being optimized away.
+declare void @dummy_use(i32*, i32)
+
+define {i32, i32} @test_basic(i32 %hp, i32 %p) {
+ ; X32-Linux: test_basic:
+ ; X32-Linux-NOT: calll inc_stack_0
+
+ ; X64-Linux: test_basic:
+ ; X64-Linux-NOT: callq inc_stack_0
+
+ %mem = alloca i32, i32 10
+ call void @dummy_use (i32* %mem, i32 10)
+ %1 = insertvalue {i32, i32} undef, i32 %hp, 0
+ %2 = insertvalue {i32, i32} %1, i32 %p, 1
+ ret {i32, i32} %1
+}
+
+define cc 11 {i32, i32} @test_basic_hipecc(i32 %hp, i32 %p) {
+ ; X32-Linux: test_basic_hipecc:
+ ; X32-Linux: leal -156(%esp), %ebx
+ ; X32-Linux-NEXT: cmpl 76(%ebp), %ebx
+ ; X32-Linux-NEXT: jb .LBB1_1
+
+ ; X32-Linux: ret
+
+ ; X32-Linux: .LBB1_1:
+ ; X32-Linux-NEXT: calll inc_stack_0
+
+ ; X64-Linux: test_basic_hipecc:
+ ; X64-Linux: leaq -232(%rsp), %r14
+ ; X64-Linux-NEXT: cmpq 144(%rbp), %r14
+ ; X64-Linux-NEXT: jb .LBB1_1
+
+ ; X64-Linux: ret
+
+ ; X64-Linux: .LBB1_1:
+ ; X64-Linux-NEXT: callq inc_stack_0
+
+ %mem = alloca i32, i32 10
+ call void @dummy_use (i32* %mem, i32 10)
+ %1 = insertvalue {i32, i32} undef, i32 %hp, 0
+ %2 = insertvalue {i32, i32} %1, i32 %p, 1
+ ret {i32, i32} %2
+}
+
+define cc 11 {i32,i32,i32} @test_nocall_hipecc(i32 %hp,i32 %p,i32 %x,i32 %y) {
+ ; X32-Linux: test_nocall_hipecc:
+ ; X32-Linux-NOT: calll inc_stack_0
+
+ ; X64-Linux: test_nocall_hipecc:
+ ; X64-Linux-NOT: callq inc_stack_0
+
+ %1 = add i32 %x, %y
+ %2 = mul i32 42, %1
+ %3 = sub i32 24, %2
+ %4 = insertvalue {i32, i32, i32} undef, i32 %hp, 0
+ %5 = insertvalue {i32, i32, i32} %4, i32 %p, 1
+ %6 = insertvalue {i32, i32, i32} %5, i32 %p, 2
+ ret {i32, i32, i32} %6
+}
More information about the llvm-commits
mailing list