[llvm] dedaf3a - [VE] Dynamic stack allocation
Simon Moll via llvm-commits
llvm-commits at lists.llvm.org
Wed May 27 01:16:37 PDT 2020
Author: Kazushi (Jam) Marukawa
Date: 2020-05-27T10:11:06+02:00
New Revision: dedaf3a2ac59548c70a0d54da7267bbb082782c0
URL: https://github.com/llvm/llvm-project/commit/dedaf3a2ac59548c70a0d54da7267bbb082782c0
DIFF: https://github.com/llvm/llvm-project/commit/dedaf3a2ac59548c70a0d54da7267bbb082782c0.diff
LOG: [VE] Dynamic stack allocation
Summary:
This patch implements dynamic stack allocation for the VE target. Changes:
* compiler-rt: `__ve_grow_stack` to request stack allocation on the VE.
* VE: base pointer support, dynamic stack allocation.
Differential Revision: https://reviews.llvm.org/D79084
Added:
compiler-rt/lib/builtins/ve/grow_stack.S
compiler-rt/lib/builtins/ve/grow_stack_align.S
llvm/test/CodeGen/VE/alloca.ll
llvm/test/CodeGen/VE/alloca_aligned.ll
Modified:
compiler-rt/cmake/Modules/CompilerRTUtils.cmake
compiler-rt/cmake/base-config-ix.cmake
compiler-rt/cmake/builtin-config-ix.cmake
compiler-rt/lib/builtins/CMakeLists.txt
llvm/lib/Target/VE/VECallingConv.td
llvm/lib/Target/VE/VEFrameLowering.cpp
llvm/lib/Target/VE/VEFrameLowering.h
llvm/lib/Target/VE/VEISelLowering.cpp
llvm/lib/Target/VE/VEISelLowering.h
llvm/lib/Target/VE/VEInstrInfo.cpp
llvm/lib/Target/VE/VEInstrInfo.h
llvm/lib/Target/VE/VEInstrInfo.td
llvm/lib/Target/VE/VERegisterInfo.cpp
llvm/lib/Target/VE/VESubtarget.h
Removed:
################################################################################
diff --git a/compiler-rt/cmake/Modules/CompilerRTUtils.cmake b/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
index a83e916990d7..0a686e38ff88 100644
--- a/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
+++ b/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
@@ -166,6 +166,7 @@ macro(detect_target_arch)
check_symbol_exists(__sparcv9 "" __SPARCV9)
check_symbol_exists(__wasm32__ "" __WEBASSEMBLY32)
check_symbol_exists(__wasm64__ "" __WEBASSEMBLY64)
+ check_symbol_exists(__ve__ "" __VE)
if(__ARM)
add_default_target_arch(arm)
elseif(__AARCH64)
@@ -200,6 +201,8 @@ macro(detect_target_arch)
add_default_target_arch(wasm32)
elseif(__WEBASSEMBLY64)
add_default_target_arch(wasm64)
+ elseif(__VE)
+ add_default_target_arch(ve)
endif()
endmacro()
diff --git a/compiler-rt/cmake/base-config-ix.cmake b/compiler-rt/cmake/base-config-ix.cmake
index 234cd7262b72..964dd598f102 100644
--- a/compiler-rt/cmake/base-config-ix.cmake
+++ b/compiler-rt/cmake/base-config-ix.cmake
@@ -237,6 +237,8 @@ macro(test_targets)
test_target_arch(wasm32 "" "--target=wasm32-unknown-unknown")
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "wasm64")
test_target_arch(wasm64 "" "--target=wasm64-unknown-unknown")
+ elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "ve")
+ test_target_arch(ve "__ve__" "--target=ve-unknown-none")
endif()
set(COMPILER_RT_OS_SUFFIX "")
endif()
diff --git a/compiler-rt/cmake/builtin-config-ix.cmake b/compiler-rt/cmake/builtin-config-ix.cmake
index 1bd7ad46df44..5f4275ae54d4 100644
--- a/compiler-rt/cmake/builtin-config-ix.cmake
+++ b/compiler-rt/cmake/builtin-config-ix.cmake
@@ -37,6 +37,7 @@ set(SPARC sparc)
set(SPARCV9 sparcv9)
set(WASM32 wasm32)
set(WASM64 wasm64)
+set(VE ve)
if(APPLE)
set(ARM64 arm64 arm64e)
@@ -44,8 +45,11 @@ if(APPLE)
set(X86_64 x86_64 x86_64h)
endif()
-set(ALL_BUILTIN_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64}
- ${HEXAGON} ${MIPS32} ${MIPS64} ${PPC64} ${RISCV32} ${RISCV64} ${SPARC} ${SPARCV9} ${WASM32} ${WASM64})
+set(ALL_BUILTIN_SUPPORTED_ARCH
+ ${X86} ${X86_64} ${ARM32} ${ARM64}
+ ${HEXAGON} ${MIPS32} ${MIPS64} ${PPC64}
+ ${RISCV32} ${RISCV64} ${SPARC} ${SPARCV9}
+ ${WASM32} ${WASM64} ${VE})
include(CompilerRTUtils)
include(CompilerRTDarwinUtils)
diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index f63f06c3bfa2..f8431bdcf059 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -573,6 +573,12 @@ set(wasm64_SOURCES
${GENERIC_SOURCES}
)
+set(ve_SOURCES
+ ve/grow_stack.S
+ ve/grow_stack_align.S
+ ${GENERIC_TF_SOURCES}
+ ${GENERIC_SOURCES})
+
add_custom_target(builtins)
set_target_properties(builtins PROPERTIES FOLDER "Compiler-RT Misc")
diff --git a/compiler-rt/lib/builtins/ve/grow_stack.S b/compiler-rt/lib/builtins/ve/grow_stack.S
new file mode 100644
index 000000000000..f403798495af
--- /dev/null
+++ b/compiler-rt/lib/builtins/ve/grow_stack.S
@@ -0,0 +1,31 @@
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "../assembly.h"
+
+// grow_stack routine
+// This routine is VE specific
+// https://www.nec.com/en/global/prod/hpc/aurora/document/VE-ABI_v1.1.pdf
+
+// destroy %s62 and %s63 only
+
+#ifdef __ve__
+
+.text
+.p2align 4
+DEFINE_COMPILERRT_FUNCTION(__ve_grow_stack)
+ subu.l %sp, %sp, %s0 # sp -= alloca size
+ and %sp, -16, %sp # align sp
+ brge.l.t %sp, %sl, 1f
+ ld %s63, 0x18(,%tp) # load param area
+ lea %s62, 0x13b # syscall # of grow
+ shm.l %s62, 0x0(%s63) # stored at addr:0
+ shm.l %sl, 0x8(%s63) # old limit at addr:8
+ shm.l %sp, 0x10(%s63) # new limit at addr:16
+ monc
+1:
+ b.l (,%lr)
+END_COMPILERRT_FUNCTION(__ve_grow_stack)
+
+#endif // __ve__
diff --git a/compiler-rt/lib/builtins/ve/grow_stack_align.S b/compiler-rt/lib/builtins/ve/grow_stack_align.S
new file mode 100644
index 000000000000..19a1dfa8726c
--- /dev/null
+++ b/compiler-rt/lib/builtins/ve/grow_stack_align.S
@@ -0,0 +1,31 @@
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "../assembly.h"
+
+// grow_stack routine
+// This routine is VE specific
+// https://www.nec.com/en/global/prod/hpc/aurora/document/VE-ABI_v1.1.pdf
+
+// destroy %s62 and %s63 only
+
+#ifdef __ve__
+
+.text
+.p2align 4
+DEFINE_COMPILERRT_FUNCTION(__ve_grow_stack_align)
+ subu.l %sp, %sp, %s0 # sp -= alloca size
+ and %sp, %sp, %s1 # align sp
+ brge.l.t %sp, %sl, 1f
+ ld %s63, 0x18(,%tp) # load param area
+ lea %s62, 0x13b # syscall # of grow
+ shm.l %s62, 0x0(%s63) # stored at addr:0
+ shm.l %sl, 0x8(%s63) # old limit at addr:8
+ shm.l %sp, 0x10(%s63) # new limit at addr:16
+ monc
+1:
+ b.l (,%lr)
+END_COMPILERRT_FUNCTION(__ve_grow_stack_align)
+
+#endif // __ve__
diff --git a/llvm/lib/Target/VE/VECallingConv.td b/llvm/lib/Target/VE/VECallingConv.td
index 5c32962658bb..4f04dae884ab 100644
--- a/llvm/lib/Target/VE/VECallingConv.td
+++ b/llvm/lib/Target/VE/VECallingConv.td
@@ -84,3 +84,6 @@ def RetCC_VE : CallingConv<[
// Callee-saved registers
def CSR : CalleeSavedRegs<(add (sequence "SX%u", 18, 33))>;
def CSR_NoRegs : CalleeSavedRegs<(add)>;
+
+// PreserveAll (clobbers s62,s63) - used for ve_grow_stack
+def CSR_preserve_all : CalleeSavedRegs<(add (sequence "SX%u", 0, 61))>;
diff --git a/llvm/lib/Target/VE/VEFrameLowering.cpp b/llvm/lib/Target/VE/VEFrameLowering.cpp
index e6cd56285198..8b10e6466123 100644
--- a/llvm/lib/Target/VE/VEFrameLowering.cpp
+++ b/llvm/lib/Target/VE/VEFrameLowering.cpp
@@ -30,12 +30,13 @@ using namespace llvm;
VEFrameLowering::VEFrameLowering(const VESubtarget &ST)
: TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(16), 0,
- Align(16)) {}
+ Align(16)),
+ STI(ST) {}
void VEFrameLowering::emitPrologueInsns(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- int NumBytes,
+ uint64_t NumBytes,
bool RequireFPUpdate) const {
DebugLoc dl;
@@ -47,6 +48,7 @@ void VEFrameLowering::emitPrologueInsns(MachineFunction &MF,
// st %lr, 8(,%sp)
// st %got, 24(,%sp)
// st %plt, 32(,%sp)
+ // st %s17, 40(,%sp) iff this function is using s17 as BP
// or %fp, 0, %sp
BuildMI(MBB, MBBI, dl, TII.get(VE::STrii))
@@ -69,6 +71,12 @@ void VEFrameLowering::emitPrologueInsns(MachineFunction &MF,
.addImm(0)
.addImm(32)
.addReg(VE::SX16);
+ if (hasBP(MF))
+ BuildMI(MBB, MBBI, dl, TII.get(VE::STrii))
+ .addReg(VE::SX11)
+ .addImm(0)
+ .addImm(40)
+ .addReg(VE::SX17);
BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX9)
.addReg(VE::SX11)
.addImm(0);
@@ -77,7 +85,7 @@ void VEFrameLowering::emitPrologueInsns(MachineFunction &MF,
void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- int NumBytes,
+ uint64_t NumBytes,
bool RequireFPUpdate) const {
DebugLoc dl;
@@ -86,6 +94,7 @@ void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF,
// Insert following codes here as epilogue
//
// or %sp, 0, %fp
+ // ld %s17, 40(,%sp) iff this function is using s17 as BP
// ld %got, 32(,%sp)
// ld %plt, 24(,%sp)
// ld %lr, 8(,%sp)
@@ -94,6 +103,11 @@ void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF,
BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX11)
.addReg(VE::SX9)
.addImm(0);
+ if (hasBP(MF))
+ BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX17)
+ .addReg(VE::SX11)
+ .addImm(0)
+ .addImm(40);
BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX16)
.addReg(VE::SX11)
.addImm(0)
@@ -115,7 +129,8 @@ void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF,
void VEFrameLowering::emitSPAdjustment(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- int NumBytes) const {
+ int64_t NumBytes,
+ MaybeAlign MaybeAlign) const {
DebugLoc dl;
const VEInstrInfo &TII =
*static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo());
@@ -143,11 +158,17 @@ void VEFrameLowering::emitSPAdjustment(MachineFunction &MF,
.addReg(VE::SX11)
.addReg(VE::SX13)
.addImm(Hi_32(NumBytes));
+
+ if (MaybeAlign) {
+ // and %sp, %sp, Align-1
+ BuildMI(MBB, MBBI, dl, TII.get(VE::ANDrm), VE::SX11)
+ .addReg(VE::SX11)
+ .addImm(M1(64 - Log2_64(MaybeAlign.valueOrOne().value())));
+ }
}
void VEFrameLowering::emitSPExtend(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- int NumBytes) const {
+ MachineBasicBlock::iterator MBBI) const {
DebugLoc dl;
const VEInstrInfo &TII =
*static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo());
@@ -186,11 +207,8 @@ void VEFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
MachineFrameInfo &MFI = MF.getFrameInfo();
- const VESubtarget &Subtarget = MF.getSubtarget<VESubtarget>();
- const VEInstrInfo &TII =
- *static_cast<const VEInstrInfo *>(Subtarget.getInstrInfo());
- const VERegisterInfo &RegInfo =
- *static_cast<const VERegisterInfo *>(Subtarget.getRegisterInfo());
+ const VEInstrInfo &TII = *STI.getInstrInfo();
+ const VERegisterInfo &RegInfo = *STI.getRegisterInfo();
MachineBasicBlock::iterator MBBI = MBB.begin();
// Debug location must be unknown since the first debug location is used
// to determine the end of the prologue.
@@ -209,30 +227,15 @@ void VEFrameLowering::emitPrologue(MachineFunction &MF,
"(probably because it has a dynamic alloca).");
// Get the number of bytes to allocate from the FrameInfo
- int NumBytes = (int)MFI.getStackSize();
- // The VE ABI requires a reserved 176-byte area in the user's stack, starting
- // at %sp + 16. This is for the callee Register Save Area (RSA).
- //
- // We therefore need to add that offset to the total stack size
- // after all the stack objects are placed by
- // PrologEpilogInserter calculateFrameObjectOffsets. However, since the stack
- // needs to be aligned *after* the extra size is added, we need to disable
- // calculateFrameObjectOffsets's built-in stack alignment, by having
- // targetHandlesStackFrameRounding return true.
-
- // Add the extra call frame stack size, if needed. (This is the same
- // code as in PrologEpilogInserter, but also gets disabled by
- // targetHandlesStackFrameRounding)
- if (MFI.adjustsStack() && hasReservedCallFrame(MF))
- NumBytes += MFI.getMaxCallFrameSize();
-
- // Adds the VE subtarget-specific spill area to the stack
- // size. Also ensures target-required alignment.
- NumBytes = Subtarget.getAdjustedFrameSize(NumBytes);
+ uint64_t NumBytes = MFI.getStackSize();
+
+ // The VE ABI requires a reserved 176 bytes area at the top
+ // of stack as described in VESubtarget.cpp. So, we adjust it here.
+ NumBytes = STI.getAdjustedFrameSize(NumBytes);
// Finally, ensure that the size is sufficiently aligned for the
// data on the stack.
- NumBytes = alignTo(NumBytes, MFI.getMaxAlign().value());
+ NumBytes = alignTo(NumBytes, MFI.getMaxAlign());
// Update stack size with corrected value.
MFI.setStackSize(NumBytes);
@@ -241,16 +244,25 @@ void VEFrameLowering::emitPrologue(MachineFunction &MF,
emitPrologueInsns(MF, MBB, MBBI, NumBytes, true);
// Emit stack adjust instructions
- emitSPAdjustment(MF, MBB, MBBI, -NumBytes);
+ MaybeAlign RuntimeAlign =
+ NeedsStackRealignment ? MaybeAlign(MFI.getMaxAlign()) : None;
+ emitSPAdjustment(MF, MBB, MBBI, -(int64_t)NumBytes, RuntimeAlign);
+
+ if (hasBP(MF)) {
+ // Copy SP to BP.
+ BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX17)
+ .addReg(VE::SX11)
+ .addImm(0);
+ }
// Emit stack extend instructions
- emitSPExtend(MF, MBB, MBBI, -NumBytes);
+ emitSPExtend(MF, MBB, MBBI);
- unsigned regFP = RegInfo.getDwarfRegNum(VE::SX9, true);
+ Register RegFP = RegInfo.getDwarfRegNum(VE::SX9, true);
// Emit ".cfi_def_cfa_register 30".
unsigned CFIIndex =
- MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, regFP));
+ MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, RegFP));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
@@ -265,7 +277,7 @@ MachineBasicBlock::iterator VEFrameLowering::eliminateCallFramePseudoInstr(
MachineBasicBlock::iterator I) const {
if (!hasReservedCallFrame(MF)) {
MachineInstr &MI = *I;
- int Size = MI.getOperand(0).getImm();
+ int64_t Size = MI.getOperand(0).getImm();
if (MI.getOpcode() == VE::ADJCALLSTACKDOWN)
Size = -Size;
@@ -281,20 +293,17 @@ void VEFrameLowering::emitEpilogue(MachineFunction &MF,
DebugLoc dl = MBBI->getDebugLoc();
MachineFrameInfo &MFI = MF.getFrameInfo();
- int NumBytes = (int)MFI.getStackSize();
+ uint64_t NumBytes = MFI.getStackSize();
// Emit Epilogue instructions to restore %lr
emitEpilogueInsns(MF, MBB, MBBI, NumBytes, true);
}
-bool VEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
- // Reserve call frame if there are no variable sized objects on the stack.
- return !MF.getFrameInfo().hasVarSizedObjects();
-}
-
// hasFP - Return true if the specified function should have a dedicated frame
-// pointer register. This is true if the function has variable sized allocas or
-// if frame pointer elimination is disabled.
+// pointer register. This is true if the function has variable sized allocas
+// or if frame pointer elimination is disabled. For the case of VE, we don't
+// implement FP eliminator yet, but we returns false from this function to
+// not refer fp from generated code.
bool VEFrameLowering::hasFP(const MachineFunction &MF) const {
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
@@ -304,44 +313,41 @@ bool VEFrameLowering::hasFP(const MachineFunction &MF) const {
MFI.isFrameAddressTaken();
}
+bool VEFrameLowering::hasBP(const MachineFunction &MF) const {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+
+ return MFI.hasVarSizedObjects() && TRI->needsStackRealignment(MF);
+}
+
int VEFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
Register &FrameReg) const {
- const VESubtarget &Subtarget = MF.getSubtarget<VESubtarget>();
const MachineFrameInfo &MFI = MF.getFrameInfo();
- const VERegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ const VERegisterInfo *RegInfo = STI.getRegisterInfo();
const VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
bool isFixed = MFI.isFixedObjectIndex(FI);
- // Addressable stack objects are accessed using neg. offsets from
- // %fp, or positive offsets from %sp.
- bool UseFP = true;
+ int64_t FrameOffset = MF.getFrameInfo().getObjectOffset(FI);
- // VE uses FP-based references in general, even when "hasFP" is
- // false. That function is rather a misnomer, because %fp is
- // actually always available, unless isLeafProc.
if (FuncInfo->isLeafProc()) {
// If there's a leaf proc, all offsets need to be %sp-based,
// because we haven't caused %fp to actually point to our frame.
- UseFP = false;
- } else if (isFixed) {
- // Otherwise, argument access should always use %fp.
- UseFP = true;
- } else if (RegInfo->needsStackRealignment(MF)) {
- // If there is dynamic stack realignment, all local object
- // references need to be via %sp, to take account of the
- // re-alignment.
- UseFP = false;
+ FrameReg = VE::SX11; // %sp
+ return FrameOffset + MF.getFrameInfo().getStackSize();
}
-
- int64_t FrameOffset = MF.getFrameInfo().getObjectOffset(FI);
-
- if (UseFP) {
- FrameReg = RegInfo->getFrameRegister(MF);
- return FrameOffset;
+ if (RegInfo->needsStackRealignment(MF) && !isFixed) {
+ // If there is dynamic stack realignment, all local object
+ // references need to be via %sp or %s17 (bp), to take account
+ // of the re-alignment.
+ if (hasBP(MF))
+ FrameReg = VE::SX17; // %bp
+ else
+ FrameReg = VE::SX11; // %sp
+ return FrameOffset + MF.getFrameInfo().getStackSize();
}
-
- FrameReg = VE::SX11; // %sp
- return FrameOffset + MF.getFrameInfo().getStackSize();
+ // Finally, default to using %fp.
+ FrameReg = RegInfo->getFrameRegister(MF);
+ return FrameOffset;
}
bool VEFrameLowering::isLeafProc(MachineFunction &MF) const {
diff --git a/llvm/lib/Target/VE/VEFrameLowering.h b/llvm/lib/Target/VE/VEFrameLowering.h
index de0227e613bc..b548d663c504 100644
--- a/llvm/lib/Target/VE/VEFrameLowering.h
+++ b/llvm/lib/Target/VE/VEFrameLowering.h
@@ -28,18 +28,23 @@ class VEFrameLowering : public TargetFrameLowering {
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitPrologueInsns(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, int NumBytes,
+ MachineBasicBlock::iterator MBBI, uint64_t NumBytes,
bool RequireFPUpdate) const;
void emitEpilogueInsns(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, int NumBytes,
+ MachineBasicBlock::iterator MBBI, uint64_t NumBytes,
bool RequireFPUpdate) const;
MachineBasicBlock::iterator
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const override;
- bool hasReservedCallFrame(const MachineFunction &MF) const override;
+ bool hasBP(const MachineFunction &MF) const;
bool hasFP(const MachineFunction &MF) const override;
+ // VE reserves argument space always for call sites in the function
+ // immediately on entry of the current function.
+ bool hasReservedCallFrame(const MachineFunction &MF) const override {
+ return true;
+ }
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS = nullptr) const override;
@@ -58,10 +63,8 @@ class VEFrameLowering : public TargetFrameLowering {
return Offsets;
}
- /// targetHandlesStackFrameRounding - Returns true if the target is
- /// responsible for rounding up the stack frame (probably at emitPrologue
- /// time).
- bool targetHandlesStackFrameRounding() const override { return true; }
+protected:
+ const VESubtarget &STI;
private:
// Returns true if MF is a leaf procedure.
@@ -69,11 +72,12 @@ class VEFrameLowering : public TargetFrameLowering {
// Emits code for adjusting SP in function prologue/epilogue.
void emitSPAdjustment(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, int NumBytes) const;
+ MachineBasicBlock::iterator MBBI, int64_t NumBytes,
+ MaybeAlign MayAlign = MaybeAlign()) const;
// Emits code for extending SP in function prologue/epilogue.
void emitSPExtend(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, int NumBytes) const;
+ MachineBasicBlock::iterator MBBI) const;
};
} // namespace llvm
diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index 8c611f7f292c..cbdf861307b3 100644
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -583,6 +583,11 @@ VETargetLowering::VETargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VAEND, MVT::Other, Expand);
/// } VAARG handling
+ /// Stack {
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
+ /// } Stack
+
/// Int Ops {
for (MVT IntVT : {MVT::i32, MVT::i64}) {
// VE has no REM or DIVREM operations.
@@ -641,6 +646,7 @@ const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
TARGET_NODE_CASE(Lo)
TARGET_NODE_CASE(Hi)
TARGET_NODE_CASE(GETFUNPLT)
+ TARGET_NODE_CASE(GETSTACKTOP)
TARGET_NODE_CASE(GETTLSADDR)
TARGET_NODE_CASE(CALL)
TARGET_NODE_CASE(RET_FLAG)
@@ -860,12 +866,79 @@ SDValue VETargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
std::min(PtrVT.getSizeInBits(), VT.getSizeInBits()) / 8);
}
+SDValue VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG) const {
+ // Generate following code.
+ // (void)__llvm_grow_stack(size);
+ // ret = GETSTACKTOP; // pseudo instruction
+ SDLoc DL(Op);
+
+ // Get the inputs.
+ SDNode *Node = Op.getNode();
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ MaybeAlign Alignment(Op.getConstantOperandVal(2));
+ EVT VT = Node->getValueType(0);
+
+ // Chain the dynamic stack allocation so that it doesn't modify the stack
+ // pointer when other instructions are using the stack.
+ Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
+
+ const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
+ Align StackAlign = TFI.getStackAlign();
+ bool NeedsAlign = Alignment.valueOrOne() > StackAlign;
+
+ // Prepare arguments
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Size;
+ Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
+ Args.push_back(Entry);
+ if (NeedsAlign) {
+ Entry.Node = DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT);
+ Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
+ Args.push_back(Entry);
+ }
+ Type *RetTy = Type::getVoidTy(*DAG.getContext());
+
+ EVT PtrVT = Op.getValueType();
+ SDValue Callee;
+ if (NeedsAlign) {
+ Callee = DAG.getTargetExternalSymbol("__ve_grow_stack_align", PtrVT, 0);
+ } else {
+ Callee = DAG.getTargetExternalSymbol("__ve_grow_stack", PtrVT, 0);
+ }
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(DL)
+ .setChain(Chain)
+ .setCallee(CallingConv::PreserveAll, RetTy, Callee, std::move(Args))
+ .setDiscardResult(true);
+ std::pair<SDValue, SDValue> pair = LowerCallTo(CLI);
+ Chain = pair.second;
+ SDValue Result = DAG.getNode(VEISD::GETSTACKTOP, DL, VT, Chain);
+ if (NeedsAlign) {
+ Result = DAG.getNode(ISD::ADD, DL, VT, Result,
+ DAG.getConstant((Alignment->value() - 1ULL), DL, VT));
+ Result = DAG.getNode(ISD::AND, DL, VT, Result,
+ DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT));
+ }
+ // Chain = Result.getValue(1);
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true),
+ DAG.getIntPtrConstant(0, DL, true), SDValue(), DL);
+
+ SDValue Ops[2] = {Result, Chain};
+ return DAG.getMergeValues(Ops, DL);
+}
+
SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default:
llvm_unreachable("Should not custom lower this!");
case ISD::BlockAddress:
return LowerBlockAddress(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC:
+ return lowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::GlobalAddress:
return LowerGlobalAddress(Op, DAG);
case ISD::GlobalTLSAddress:
diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h
index a3ead990bccf..097960f05a83 100644
--- a/llvm/lib/Target/VE/VEISelLowering.h
+++ b/llvm/lib/Target/VE/VEISelLowering.h
@@ -27,8 +27,10 @@ enum NodeType : unsigned {
Hi,
Lo, // Hi/Lo operations, typically on a global address.
- GETFUNPLT, // load function address through %plt insturction
- GETTLSADDR, // load address for TLS access
+ GETFUNPLT, // load function address through %plt insturction
+ GETTLSADDR, // load address for TLS access
+ GETSTACKTOP, // retrieve address of stack top (first address of
+ // locals and temporaries)
CALL, // A call instruction.
RET_FLAG, // Return with a flag operand.
@@ -81,6 +83,7 @@ class VETargetLowering : public TargetLowering {
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerToTLSGeneralDynamicModel(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
/// } Custom Lower
SDValue withTargetFlags(SDValue Op, unsigned TF, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/VE/VEInstrInfo.cpp b/llvm/lib/Target/VE/VEInstrInfo.cpp
index 02a63f4aa365..aa19c6ce0687 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.cpp
+++ b/llvm/lib/Target/VE/VEInstrInfo.cpp
@@ -25,7 +25,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
-#define DEBUG_TYPE "ve"
+#define DEBUG_TYPE "ve-instr-info"
using namespace llvm;
@@ -457,6 +457,9 @@ bool VEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MI.eraseFromParent(); // The pseudo instruction is gone now.
return true;
}
+ case VE::GETSTACKTOP: {
+ return expandGetStackTopPseudo(MI);
+ }
}
return false;
}
@@ -464,8 +467,8 @@ bool VEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
bool VEInstrInfo::expandExtendStackPseudo(MachineInstr &MI) const {
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- const VEInstrInfo &TII =
- *static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const VESubtarget &STI = MF.getSubtarget<VESubtarget>();
+ const VEInstrInfo &TII = *STI.getInstrInfo();
DebugLoc dl = MBB.findDebugLoc(MI);
// Create following instructions and multiple basic blocks.
@@ -544,3 +547,35 @@ bool VEInstrInfo::expandExtendStackPseudo(MachineInstr &MI) const {
MI.eraseFromParent(); // The pseudo instruction is gone now.
return true;
}
+
+bool VEInstrInfo::expandGetStackTopPseudo(MachineInstr &MI) const {
+ MachineBasicBlock *MBB = MI.getParent();
+ MachineFunction &MF = *MBB->getParent();
+ const VESubtarget &STI = MF.getSubtarget<VESubtarget>();
+ const VEInstrInfo &TII = *STI.getInstrInfo();
+ DebugLoc DL = MBB->findDebugLoc(MI);
+
+ // Create following instruction
+ //
+ // dst = %sp + target specific frame + the size of parameter area
+
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const VEFrameLowering &TFL = *STI.getFrameLowering();
+
+ // The VE ABI requires a reserved 176 bytes area at the top
+ // of stack as described in VESubtarget.cpp. So, we adjust it here.
+ unsigned NumBytes = STI.getAdjustedFrameSize(0);
+
+ // Also adds the size of parameter area.
+ if (MFI.adjustsStack() && TFL.hasReservedCallFrame(MF))
+ NumBytes += MFI.getMaxCallFrameSize();
+
+ BuildMI(*MBB, MI, DL, TII.get(VE::LEArii))
+ .addDef(MI.getOperand(0).getReg())
+ .addReg(VE::SX11)
+ .addImm(0)
+ .addImm(NumBytes);
+
+ MI.eraseFromParent(); // The pseudo instruction is gone now.
+ return true;
+}
diff --git a/llvm/lib/Target/VE/VEInstrInfo.h b/llvm/lib/Target/VE/VEInstrInfo.h
index 4e28279a6675..7b6662df1d60 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.h
+++ b/llvm/lib/Target/VE/VEInstrInfo.h
@@ -81,6 +81,7 @@ class VEInstrInfo : public VEGenInstrInfo {
bool expandPostRAPseudo(MachineInstr &MI) const override;
bool expandExtendStackPseudo(MachineInstr &MI) const;
+ bool expandGetStackTopPseudo(MachineInstr &MI) const;
};
} // namespace llvm
diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td
index 87c8015c775b..c7815efb8c71 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.td
+++ b/llvm/lib/Target/VE/VEInstrInfo.td
@@ -414,6 +414,9 @@ def GetTLSAddr : SDNode<"VEISD::GETTLSADDR", SDT_SPCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
+// GETSTACKTOP
+def GetStackTop : SDNode<"VEISD::GETSTACKTOP", SDTNone,
+ [SDNPHasChain, SDNPSideEffect]>;
//===----------------------------------------------------------------------===//
@@ -1398,6 +1401,14 @@ def EXTEND_STACK_GUARD : Pseudo<(outs), (ins),
"# EXTEND STACK GUARD",
[]>;
+// Dynamic stack allocation yields a __llvm_grow_stack for VE targets.
+// These calls are needed to probe the stack when allocating more over
+// %s8 (%sl - stack limit).
+
+let Uses = [SX11], hasSideEffects = 1 in
+def GETSTACKTOP : Pseudo<(outs I64:$dst), (ins),
+ "# GET STACK TOP",
+ [(set iPTR:$dst, (GetStackTop))]>;
// SETCC pattern matches
//
// CMP %tmp, lhs, rhs ; compare lhs and rhs
diff --git a/llvm/lib/Target/VE/VERegisterInfo.cpp b/llvm/lib/Target/VE/VERegisterInfo.cpp
index b0ddc956d7cc..5783a8df69d2 100644
--- a/llvm/lib/Target/VE/VERegisterInfo.cpp
+++ b/llvm/lib/Target/VE/VERegisterInfo.cpp
@@ -34,12 +34,22 @@ VERegisterInfo::VERegisterInfo() : VEGenRegisterInfo(VE::SX10) {}
const MCPhysReg *
VERegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- return CSR_SaveList;
+ switch (MF->getFunction().getCallingConv()) {
+ default:
+ return CSR_SaveList;
+ case CallingConv::PreserveAll:
+ return CSR_preserve_all_SaveList;
+ }
}
const uint32_t *VERegisterInfo::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
- return CSR_RegMask;
+ switch (CC) {
+ default:
+ return CSR_RegMask;
+ case CallingConv::PreserveAll:
+ return CSR_preserve_all_RegMask;
+ }
}
const uint32_t *VERegisterInfo::getNoPreservedMask() const {
diff --git a/llvm/lib/Target/VE/VESubtarget.h b/llvm/lib/Target/VE/VESubtarget.h
index e9637cc16023..f3a2c206162e 100644
--- a/llvm/lib/Target/VE/VESubtarget.h
+++ b/llvm/lib/Target/VE/VESubtarget.h
@@ -42,7 +42,7 @@ class VESubtarget : public VEGenSubtargetInfo {
const TargetMachine &TM);
const VEInstrInfo *getInstrInfo() const override { return &InstrInfo; }
- const TargetFrameLowering *getFrameLowering() const override {
+ const VEFrameLowering *getFrameLowering() const override {
return &FrameLowering;
}
const VERegisterInfo *getRegisterInfo() const override {
diff --git a/llvm/test/CodeGen/VE/alloca.ll b/llvm/test/CodeGen/VE/alloca.ll
new file mode 100644
index 000000000000..a4d349fefd0a
--- /dev/null
+++ b/llvm/test/CodeGen/VE/alloca.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s
+
+declare void @bar(i8*, i64)
+
+; Function Attrs: nounwind
+define void @test(i64 %n) {
+; CHECK-LABEL: test:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: or %s1, 0, %s0
+; CHECK-NEXT: lea %s0, 15(, %s0)
+; CHECK-NEXT: and %s0, -16, %s0
+; CHECK-NEXT: lea %s2, __ve_grow_stack at lo
+; CHECK-NEXT: and %s2, %s2, (32)0
+; CHECK-NEXT: lea.sl %s12, __ve_grow_stack at hi(, %s2)
+; CHECK-NEXT: bsic %s10, (, %s12)
+; CHECK-NEXT: lea %s0, 240(, %s11)
+; CHECK-NEXT: lea %s2, bar at lo
+; CHECK-NEXT: and %s2, %s2, (32)0
+; CHECK-NEXT: lea.sl %s12, bar at hi(, %s2)
+; CHECK-NEXT: bsic %s10, (, %s12)
+; CHECK-NEXT: or %s11, 0, %s9
+ %dyna = alloca i8, i64 %n, align 8
+ call void @bar(i8* %dyna, i64 %n)
+ ret void
+}
diff --git a/llvm/test/CodeGen/VE/alloca_aligned.ll b/llvm/test/CodeGen/VE/alloca_aligned.ll
new file mode 100644
index 000000000000..81cdcb56f792
--- /dev/null
+++ b/llvm/test/CodeGen/VE/alloca_aligned.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s
+
+declare void @bar(i8*, i64)
+
+; Function Attrs: nounwind
+define void @test(i64 %n) {
+; CHECK-LABEL: test:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: or %s2, 0, %s0
+; CHECK-NEXT: lea %s0, 15(, %s0)
+; CHECK-NEXT: and %s0, -16, %s0
+; CHECK-NEXT: lea %s1, __ve_grow_stack_align at lo
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align at hi(, %s1)
+; CHECK-NEXT: or %s1, -32, (0)1
+; CHECK-NEXT: bsic %s10, (, %s12)
+; CHECK-NEXT: lea %s0, 240(, %s11)
+; CHECK-NEXT: lea %s0, 31(, %s0)
+; CHECK-NEXT: and %s0, -32, %s0
+; CHECK-NEXT: lea %s1, bar at lo
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lea.sl %s12, bar at hi(, %s1)
+; CHECK-NEXT: or %s1, 0, %s2
+; CHECK-NEXT: bsic %s10, (, %s12)
+; CHECK-NEXT: or %s11, 0, %s9
+ %dyna = alloca i8, i64 %n, align 32
+ call void @bar(i8* %dyna, i64 %n)
+ ret void
+}
More information about the llvm-commits
mailing list