[llvm] r210489 - ARM: add VLA extension for WoA Itanium ABI
Saleem Abdulrasool
compnerd at compnerd.org
Mon Jun 9 13:18:43 PDT 2014
Author: compnerd
Date: Mon Jun 9 15:18:42 2014
New Revision: 210489
URL: http://llvm.org/viewvc/llvm-project?rev=210489&view=rev
Log:
ARM: add VLA extension for WoA Itanium ABI
The armv7-windows-itanium environment is nearly identical to the MSVC ABI. It
has a few divergences, mostly revolving around the use of the Itanium ABI for
C++. VLA support is one of the extensions that are amongst the set of the
extensions.
This adds support for proper VLA emission for this environment. This is
somewhat similar to the handling for __chkstk emission on X86 and the large
stack frame emission for ARM. The invocation style for chkstk is still
controlled via the -mcmodel flag to clang.
Make an explicit note that this is an extension.
Added:
llvm/trunk/test/CodeGen/ARM/Windows/vla.ll
Modified:
llvm/trunk/docs/Extensions.rst
llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
llvm/trunk/lib/Target/ARM/ARMISelLowering.h
llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
Modified: llvm/trunk/docs/Extensions.rst
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/Extensions.rst?rev=210489&r1=210488&r2=210489&view=diff
==============================================================================
--- llvm/trunk/docs/Extensions.rst (original)
+++ llvm/trunk/docs/Extensions.rst Mon Jun 9 15:18:42 2014
@@ -195,3 +195,17 @@ range via a slight deviation. It will g
blx r12
sub.w sp, sp, r4
+Variable Length Arrays
+^^^^^^^^^^^^^^^^^^^^^^
+
+The reference implementation (Microsoft Visual Studio 2012) does not permit the
+emission of Variable Length Arrays (VLAs).
+
+The Windows ARM Itanium ABI extends the base ABI by adding support for emitting
+a dynamic stack allocation. When emitting a variable stack allocation, a call
+to ``__chkstk`` is emitted unconditionally to ensure that guard pages are setup
+properly. The emission of this stack probe emission is handled similar to the
+standard stack probe emission.
+
+The MSVC environment does not emit code for VLAs currently.
+
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=210489&r1=210488&r2=210489&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Mon Jun 9 15:18:42 2014
@@ -710,7 +710,11 @@ ARMTargetLowering::ARMTargetLowering(Tar
setExceptionSelectorRegister(ARM::R1);
}
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+ if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+ else
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+
// ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
// the default expansion.
if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) {
@@ -983,6 +987,8 @@ const char *ARMTargetLowering::getTarget
case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
+ case ARMISD::WIN__CHKSTK: return "ARMISD:::WIN__CHKSTK";
+
case ARMISD::VCEQ: return "ARMISD::VCEQ";
case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
case ARMISD::VCGE: return "ARMISD::VCGE";
@@ -6214,6 +6220,10 @@ SDValue ARMTargetLowering::LowerOperatio
case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
case ISD::SDIVREM:
case ISD::UDIVREM: return LowerDivRem(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC:
+ if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
+ return LowerDYNAMIC_STACKALLOC(Op, DAG);
+ llvm_unreachable("Don't know how to custom lower this!");
}
}
@@ -7113,6 +7123,73 @@ ARMTargetLowering::EmitStructByval(Machi
}
MachineBasicBlock *
+ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ const TargetMachine &TM = getTargetMachine();
+ const TargetInstrInfo &TII = *TM.getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+
+ assert(Subtarget->isTargetWindows() &&
+ "__chkstk is only supported on Windows");
+ assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode");
+
+ // __chkstk takes the number of words to allocate on the stack in R4, and
+ // returns the stack adjustment in number of bytes in R4. This will not
+ // clober any other registers (other than the obvious lr).
+ //
+ // Although, technically, IP should be considered a register which may be
+ // clobbered, the call itself will not touch it. Windows on ARM is a pure
+ // thumb-2 environment, so there is no interworking required. As a result, we
+ // do not expect a veneer to be emitted by the linker, clobbering IP.
+ //
+ // Each module recieves its own copy of __chkstk, so no import thunk is
+ // required, again, ensuring that IP is not clobbered.
+ //
+ // Finally, although some linkers may theoretically provide a trampoline for
+ // out of range calls (which is quite common due to a 32M range limitation of
+ // branches for Thumb), we can generate the long-call version via
+ // -mcmodel=large, alleviating the need for the trampoline which may clobber
+ // IP.
+
+ switch (TM.getCodeModel()) {
+ case CodeModel::Small:
+ case CodeModel::Medium:
+ case CodeModel::Default:
+ case CodeModel::Kernel:
+ BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))
+ .addImm((unsigned)ARMCC::AL).addReg(0)
+ .addExternalSymbol("__chkstk")
+ .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
+ .addReg(ARM::R4, RegState::Implicit | RegState::Define)
+ .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead);
+ break;
+ case CodeModel::Large:
+ case CodeModel::JITDefault: {
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ unsigned Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
+
+ BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
+ .addExternalSymbol("__chkstk");
+ BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr))
+ .addImm((unsigned)ARMCC::AL).addReg(0)
+ .addReg(Reg, RegState::Kill)
+ .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
+ .addReg(ARM::R4, RegState::Implicit | RegState::Define)
+ .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead);
+ break;
+ }
+ }
+
+ AddDefaultCC(AddDefaultPred(BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr),
+ ARM::SP)
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::R4, RegState::Kill)));
+
+ MI->eraseFromParent();
+ return MBB;
+}
+
+MachineBasicBlock *
ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
@@ -7361,6 +7438,8 @@ ARMTargetLowering::EmitInstrWithCustomIn
case ARM::COPY_STRUCT_BYVAL_I32:
++NumLoopByVals;
return EmitStructByval(MI, BB);
+ case ARM::WIN__CHKSTK:
+ return EmitLowered__chkstk(MI, BB);
}
}
@@ -10481,6 +10560,32 @@ SDValue ARMTargetLowering::LowerDivRem(S
return CallInfo.first;
}
+SDValue
+ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetWindows() && "unsupported target platform");
+ SDLoc DL(Op);
+
+ // Get the inputs.
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+
+ SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size,
+ DAG.getConstant(2, MVT::i32));
+
+ SDValue Flag;
+ Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag);
+ Flag = Chain.getValue(1);
+
+ SDVTList NodeTys = DAG.getVTList(MVT::i32, MVT::Glue);
+ Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag);
+
+ SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
+ Chain = NewSP.getValue(1);
+
+ SDValue Ops[2] = { NewSP, Chain };
+ return DAG.getMergeValues(Ops, DL);
+}
+
bool
ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// The ARM target isn't yet aware of offsets.
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=210489&r1=210488&r2=210489&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Mon Jun 9 15:18:42 2014
@@ -95,6 +95,8 @@ namespace llvm {
PRELOAD, // Preload
+ WIN__CHKSTK, // Windows' __chkstk call to do stack probing.
+
VCEQ, // Vector compare equal.
VCEQZ, // Vector compare equal to zero.
VCGE, // Vector compare greater than or equal.
@@ -470,6 +472,7 @@ namespace llvm {
const ARMSubtarget *ST) const;
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
unsigned getRegisterByName(const char* RegName, EVT VT) const override;
@@ -578,6 +581,9 @@ namespace llvm {
MachineBasicBlock *EmitStructByval(MachineInstr *MI,
MachineBasicBlock *MBB) const;
+
+ MachineBasicBlock *EmitLowered__chkstk(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
};
enum NEONModImmType {
Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=210489&r1=210488&r2=210489&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Mon Jun 9 15:18:42 2014
@@ -5093,6 +5093,19 @@ def MSRi : ABI<0b0011, (outs), (ins msr_
let Inst{11-0} = a;
}
+// Dynamic stack allocation yields a _chkstk for Windows targets. These calls
+// are needed to probe the stack when allocating more than
+// 4k bytes in one go. Touching the stack at 4K increments is necessary to
+// ensure that the guard pages used by the OS virtual memory manager are
+// allocated in correct sequence.
+// The main point of having separate instruction are extra unmodelled effects
+// (compared to ordinary calls) like stack pointer change.
+
+def win__chkstk : SDNode<"ARMISD::WIN__CHKSTK", SDTNone,
+ [SDNPHasChain, SDNPSideEffect]>;
+let usesCustomInserter = 1, Uses = [R4], Defs = [R4, SP] in
+ def WIN__CHKSTK : PseudoInst<(outs), (ins), NoItinerary, [(win__chkstk)]>;
+
//===----------------------------------------------------------------------===//
// TLS Instructions
//
Added: llvm/trunk/test/CodeGen/ARM/Windows/vla.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/Windows/vla.ll?rev=210489&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/Windows/vla.ll (added)
+++ llvm/trunk/test/CodeGen/ARM/Windows/vla.ll Mon Jun 9 15:18:42 2014
@@ -0,0 +1,31 @@
+; RUN: llc -mtriple=thumbv7-windows-itanium -mcpu=cortex-a9 -o - %s \
+; RUN: | FileCheck %s -check-prefix CHECK-SMALL-CODE
+; RUN: llc -mtriple=thumbv7-windows-itanium -mcpu=cortex-a9 -code-model=large -o - %s \
+; RUN: | FileCheck %s -check-prefix CHECK-LARGE-CODE
+; RUN: llc -mtriple=thumbv7-windows-msvc -mcpu=cortex-a9 -o - %s \
+; RUN: | FileCheck %s -check-prefix CHECK-MSVC
+
+define arm_aapcs_vfpcc i8 @function(i32 %sz, i32 %idx) {
+entry:
+ %vla = alloca i8, i32 %sz, align 1
+ %arrayidx = getelementptr inbounds i8* %vla, i32 %idx
+ %0 = load volatile i8* %arrayidx, align 1
+ ret i8 %0
+}
+
+; CHECK-SMALL-CODE: adds [[R4:r[0-9]+]], #7
+; CHECK-SMALL-CODE: bic [[R4]], [[R4]], #7
+; CHECK-SMALL-CODE: lsrs r4, [[R4]], #2
+; CHECK-SMALL-CODE: bl __chkstk
+; CHECK-SMALL-CODE: sub.w sp, sp, r4
+
+; CHECK-LARGE-CODE: adds [[R4:r[0-9]+]], #7
+; CHECK-LARGE-CODE: bic [[R4]], [[R4]], #7
+; CHECK-LARGE-CODE: lsrs r4, [[R4]], #2
+; CHECK-LARGE-CODE: movw [[IP:r[0-9]+]], :lower16:__chkstk
+; CHECK-LARGE-CODE: movt [[IP]], :upper16:__chkstk
+; CHECK-LARGE-CODE: blx [[IP]]
+; CHECK-LARGE-CODE: sub.w sp, sp, r4
+
+; CHECK-MSVC-NOT: __chkstk
+
More information about the llvm-commits
mailing list