[llvm] dedaf3a - [VE] Dynamic stack allocation

Simon Moll via llvm-commits llvm-commits at lists.llvm.org
Wed May 27 01:16:37 PDT 2020


Author: Kazushi (Jam) Marukawa
Date: 2020-05-27T10:11:06+02:00
New Revision: dedaf3a2ac59548c70a0d54da7267bbb082782c0

URL: https://github.com/llvm/llvm-project/commit/dedaf3a2ac59548c70a0d54da7267bbb082782c0
DIFF: https://github.com/llvm/llvm-project/commit/dedaf3a2ac59548c70a0d54da7267bbb082782c0.diff

LOG: [VE] Dynamic stack allocation

Summary:
This patch implements dynamic stack allocation for the VE target. Changes:
* compiler-rt: `__ve_grow_stack` to request stack allocation on the VE.
* VE: base pointer support, dynamic stack allocation.

Differential Revision: https://reviews.llvm.org/D79084

Added: 
    compiler-rt/lib/builtins/ve/grow_stack.S
    compiler-rt/lib/builtins/ve/grow_stack_align.S
    llvm/test/CodeGen/VE/alloca.ll
    llvm/test/CodeGen/VE/alloca_aligned.ll

Modified: 
    compiler-rt/cmake/Modules/CompilerRTUtils.cmake
    compiler-rt/cmake/base-config-ix.cmake
    compiler-rt/cmake/builtin-config-ix.cmake
    compiler-rt/lib/builtins/CMakeLists.txt
    llvm/lib/Target/VE/VECallingConv.td
    llvm/lib/Target/VE/VEFrameLowering.cpp
    llvm/lib/Target/VE/VEFrameLowering.h
    llvm/lib/Target/VE/VEISelLowering.cpp
    llvm/lib/Target/VE/VEISelLowering.h
    llvm/lib/Target/VE/VEInstrInfo.cpp
    llvm/lib/Target/VE/VEInstrInfo.h
    llvm/lib/Target/VE/VEInstrInfo.td
    llvm/lib/Target/VE/VERegisterInfo.cpp
    llvm/lib/Target/VE/VESubtarget.h

Removed: 
    


################################################################################
diff  --git a/compiler-rt/cmake/Modules/CompilerRTUtils.cmake b/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
index a83e916990d7..0a686e38ff88 100644
--- a/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
+++ b/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
@@ -166,6 +166,7 @@ macro(detect_target_arch)
   check_symbol_exists(__sparcv9 "" __SPARCV9)
   check_symbol_exists(__wasm32__ "" __WEBASSEMBLY32)
   check_symbol_exists(__wasm64__ "" __WEBASSEMBLY64)
+  check_symbol_exists(__ve__ "" __VE)
   if(__ARM)
     add_default_target_arch(arm)
   elseif(__AARCH64)
@@ -200,6 +201,8 @@ macro(detect_target_arch)
     add_default_target_arch(wasm32)
   elseif(__WEBASSEMBLY64)
     add_default_target_arch(wasm64)
+  elseif(__VE)
+    add_default_target_arch(ve)
   endif()
 endmacro()
 

diff  --git a/compiler-rt/cmake/base-config-ix.cmake b/compiler-rt/cmake/base-config-ix.cmake
index 234cd7262b72..964dd598f102 100644
--- a/compiler-rt/cmake/base-config-ix.cmake
+++ b/compiler-rt/cmake/base-config-ix.cmake
@@ -237,6 +237,8 @@ macro(test_targets)
       test_target_arch(wasm32 "" "--target=wasm32-unknown-unknown")
     elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "wasm64")
       test_target_arch(wasm64 "" "--target=wasm64-unknown-unknown")
+    elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "ve")
+      test_target_arch(ve "__ve__" "--target=ve-unknown-none")
     endif()
     set(COMPILER_RT_OS_SUFFIX "")
   endif()

diff  --git a/compiler-rt/cmake/builtin-config-ix.cmake b/compiler-rt/cmake/builtin-config-ix.cmake
index 1bd7ad46df44..5f4275ae54d4 100644
--- a/compiler-rt/cmake/builtin-config-ix.cmake
+++ b/compiler-rt/cmake/builtin-config-ix.cmake
@@ -37,6 +37,7 @@ set(SPARC sparc)
 set(SPARCV9 sparcv9)
 set(WASM32 wasm32)
 set(WASM64 wasm64)
+set(VE ve)
 
 if(APPLE)
   set(ARM64 arm64 arm64e)
@@ -44,8 +45,11 @@ if(APPLE)
   set(X86_64 x86_64 x86_64h)
 endif()
 
-set(ALL_BUILTIN_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64}
-    ${HEXAGON} ${MIPS32} ${MIPS64} ${PPC64} ${RISCV32} ${RISCV64} ${SPARC} ${SPARCV9} ${WASM32} ${WASM64})
+set(ALL_BUILTIN_SUPPORTED_ARCH
+  ${X86} ${X86_64} ${ARM32} ${ARM64}
+  ${HEXAGON} ${MIPS32} ${MIPS64} ${PPC64}
+  ${RISCV32} ${RISCV64} ${SPARC} ${SPARCV9}
+  ${WASM32} ${WASM64} ${VE})
 
 include(CompilerRTUtils)
 include(CompilerRTDarwinUtils)

diff  --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index f63f06c3bfa2..f8431bdcf059 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -573,6 +573,12 @@ set(wasm64_SOURCES
   ${GENERIC_SOURCES}
 )
 
+set(ve_SOURCES
+  ve/grow_stack.S
+  ve/grow_stack_align.S
+  ${GENERIC_TF_SOURCES}
+  ${GENERIC_SOURCES})
+
 add_custom_target(builtins)
 set_target_properties(builtins PROPERTIES FOLDER "Compiler-RT Misc")
 

diff  --git a/compiler-rt/lib/builtins/ve/grow_stack.S b/compiler-rt/lib/builtins/ve/grow_stack.S
new file mode 100644
index 000000000000..f403798495af
--- /dev/null
+++ b/compiler-rt/lib/builtins/ve/grow_stack.S
@@ -0,0 +1,31 @@
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "../assembly.h"
+
+// grow_stack routine
+// This routine is VE specific
+// https://www.nec.com/en/global/prod/hpc/aurora/document/VE-ABI_v1.1.pdf
+
+// destroy %s62 and %s63 only
+
+#ifdef __ve__
+
+.text
+.p2align        4
+DEFINE_COMPILERRT_FUNCTION(__ve_grow_stack)
+        subu.l          %sp, %sp, %s0           # sp -= alloca size
+        and             %sp, -16, %sp           # align sp
+        brge.l.t        %sp, %sl, 1f
+        ld              %s63, 0x18(,%tp)        # load param area
+        lea             %s62, 0x13b             # syscall # of grow
+        shm.l           %s62, 0x0(%s63)         # stored at addr:0
+        shm.l           %sl, 0x8(%s63)          # old limit at addr:8
+        shm.l           %sp, 0x10(%s63)         # new limit at addr:16
+        monc
+1:
+        b.l             (,%lr)
+END_COMPILERRT_FUNCTION(__ve_grow_stack)
+
+#endif // __ve__

diff  --git a/compiler-rt/lib/builtins/ve/grow_stack_align.S b/compiler-rt/lib/builtins/ve/grow_stack_align.S
new file mode 100644
index 000000000000..19a1dfa8726c
--- /dev/null
+++ b/compiler-rt/lib/builtins/ve/grow_stack_align.S
@@ -0,0 +1,31 @@
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "../assembly.h"
+
+// grow_stack routine
+// This routine is VE specific
+// https://www.nec.com/en/global/prod/hpc/aurora/document/VE-ABI_v1.1.pdf
+
+// destroy %s62 and %s63 only
+
+#ifdef __ve__
+
+.text
+.p2align        4
+DEFINE_COMPILERRT_FUNCTION(__ve_grow_stack_align)
+        subu.l          %sp, %sp, %s0           # sp -= alloca size
+        and             %sp, %sp, %s1           # align sp
+        brge.l.t        %sp, %sl, 1f
+        ld              %s63, 0x18(,%tp)        # load param area
+        lea             %s62, 0x13b             # syscall # of grow
+        shm.l           %s62, 0x0(%s63)         # stored at addr:0
+        shm.l           %sl, 0x8(%s63)          # old limit at addr:8
+        shm.l           %sp, 0x10(%s63)         # new limit at addr:16
+        monc
+1:
+        b.l             (,%lr)
+END_COMPILERRT_FUNCTION(__ve_grow_stack_align)
+
+#endif // __ve__

diff  --git a/llvm/lib/Target/VE/VECallingConv.td b/llvm/lib/Target/VE/VECallingConv.td
index 5c32962658bb..4f04dae884ab 100644
--- a/llvm/lib/Target/VE/VECallingConv.td
+++ b/llvm/lib/Target/VE/VECallingConv.td
@@ -84,3 +84,6 @@ def RetCC_VE : CallingConv<[
 // Callee-saved registers
 def CSR : CalleeSavedRegs<(add (sequence "SX%u", 18, 33))>;
 def CSR_NoRegs : CalleeSavedRegs<(add)>;
+
+// PreserveAll (clobbers s62,s63) - used for ve_grow_stack
+def CSR_preserve_all : CalleeSavedRegs<(add (sequence "SX%u", 0, 61))>;

diff  --git a/llvm/lib/Target/VE/VEFrameLowering.cpp b/llvm/lib/Target/VE/VEFrameLowering.cpp
index e6cd56285198..8b10e6466123 100644
--- a/llvm/lib/Target/VE/VEFrameLowering.cpp
+++ b/llvm/lib/Target/VE/VEFrameLowering.cpp
@@ -30,12 +30,13 @@ using namespace llvm;
 
 VEFrameLowering::VEFrameLowering(const VESubtarget &ST)
     : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(16), 0,
-                          Align(16)) {}
+                          Align(16)),
+      STI(ST) {}
 
 void VEFrameLowering::emitPrologueInsns(MachineFunction &MF,
                                         MachineBasicBlock &MBB,
                                         MachineBasicBlock::iterator MBBI,
-                                        int NumBytes,
+                                        uint64_t NumBytes,
                                         bool RequireFPUpdate) const {
 
   DebugLoc dl;
@@ -47,6 +48,7 @@ void VEFrameLowering::emitPrologueInsns(MachineFunction &MF,
   //    st %lr, 8(,%sp)
   //    st %got, 24(,%sp)
   //    st %plt, 32(,%sp)
+  //    st %s17, 40(,%sp) iff this function is using s17 as BP
   //    or %fp, 0, %sp
 
   BuildMI(MBB, MBBI, dl, TII.get(VE::STrii))
@@ -69,6 +71,12 @@ void VEFrameLowering::emitPrologueInsns(MachineFunction &MF,
       .addImm(0)
       .addImm(32)
       .addReg(VE::SX16);
+  if (hasBP(MF))
+    BuildMI(MBB, MBBI, dl, TII.get(VE::STrii))
+        .addReg(VE::SX11)
+        .addImm(0)
+        .addImm(40)
+        .addReg(VE::SX17);
   BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX9)
       .addReg(VE::SX11)
       .addImm(0);
@@ -77,7 +85,7 @@ void VEFrameLowering::emitPrologueInsns(MachineFunction &MF,
 void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF,
                                         MachineBasicBlock &MBB,
                                         MachineBasicBlock::iterator MBBI,
-                                        int NumBytes,
+                                        uint64_t NumBytes,
                                         bool RequireFPUpdate) const {
 
   DebugLoc dl;
@@ -86,6 +94,7 @@ void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF,
   // Insert following codes here as epilogue
   //
   //    or %sp, 0, %fp
+  //    ld %s17, 40(,%sp) iff this function is using s17 as BP
   //    ld %got, 32(,%sp)
   //    ld %plt, 24(,%sp)
   //    ld %lr, 8(,%sp)
@@ -94,6 +103,11 @@ void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF,
   BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX11)
       .addReg(VE::SX9)
       .addImm(0);
+  if (hasBP(MF))
+    BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX17)
+        .addReg(VE::SX11)
+        .addImm(0)
+        .addImm(40);
   BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX16)
       .addReg(VE::SX11)
       .addImm(0)
@@ -115,7 +129,8 @@ void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF,
 void VEFrameLowering::emitSPAdjustment(MachineFunction &MF,
                                        MachineBasicBlock &MBB,
                                        MachineBasicBlock::iterator MBBI,
-                                       int NumBytes) const {
+                                       int64_t NumBytes,
+                                       MaybeAlign MaybeAlign) const {
   DebugLoc dl;
   const VEInstrInfo &TII =
       *static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo());
@@ -143,11 +158,17 @@ void VEFrameLowering::emitSPAdjustment(MachineFunction &MF,
       .addReg(VE::SX11)
       .addReg(VE::SX13)
       .addImm(Hi_32(NumBytes));
+
+  if (MaybeAlign) {
+    // and %sp, %sp, Align-1
+    BuildMI(MBB, MBBI, dl, TII.get(VE::ANDrm), VE::SX11)
+        .addReg(VE::SX11)
+        .addImm(M1(64 - Log2_64(MaybeAlign.valueOrOne().value())));
+  }
 }
 
 void VEFrameLowering::emitSPExtend(MachineFunction &MF, MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator MBBI,
-                                   int NumBytes) const {
+                                   MachineBasicBlock::iterator MBBI) const {
   DebugLoc dl;
   const VEInstrInfo &TII =
       *static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo());
@@ -186,11 +207,8 @@ void VEFrameLowering::emitPrologue(MachineFunction &MF,
                                    MachineBasicBlock &MBB) const {
   assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
   MachineFrameInfo &MFI = MF.getFrameInfo();
-  const VESubtarget &Subtarget = MF.getSubtarget<VESubtarget>();
-  const VEInstrInfo &TII =
-      *static_cast<const VEInstrInfo *>(Subtarget.getInstrInfo());
-  const VERegisterInfo &RegInfo =
-      *static_cast<const VERegisterInfo *>(Subtarget.getRegisterInfo());
+  const VEInstrInfo &TII = *STI.getInstrInfo();
+  const VERegisterInfo &RegInfo = *STI.getRegisterInfo();
   MachineBasicBlock::iterator MBBI = MBB.begin();
   // Debug location must be unknown since the first debug location is used
   // to determine the end of the prologue.
@@ -209,30 +227,15 @@ void VEFrameLowering::emitPrologue(MachineFunction &MF,
                        "(probably because it has a dynamic alloca).");
 
   // Get the number of bytes to allocate from the FrameInfo
-  int NumBytes = (int)MFI.getStackSize();
-  // The VE ABI requires a reserved 176-byte area in the user's stack, starting
-  // at %sp + 16. This is for the callee Register Save Area (RSA).
-  //
-  // We therefore need to add that offset to the total stack size
-  // after all the stack objects are placed by
-  // PrologEpilogInserter calculateFrameObjectOffsets. However, since the stack
-  // needs to be aligned *after* the extra size is added, we need to disable
-  // calculateFrameObjectOffsets's built-in stack alignment, by having
-  // targetHandlesStackFrameRounding return true.
-
-  // Add the extra call frame stack size, if needed. (This is the same
-  // code as in PrologEpilogInserter, but also gets disabled by
-  // targetHandlesStackFrameRounding)
-  if (MFI.adjustsStack() && hasReservedCallFrame(MF))
-    NumBytes += MFI.getMaxCallFrameSize();
-
-  // Adds the VE subtarget-specific spill area to the stack
-  // size. Also ensures target-required alignment.
-  NumBytes = Subtarget.getAdjustedFrameSize(NumBytes);
+  uint64_t NumBytes = MFI.getStackSize();
+
+  // The VE ABI requires a reserved 176 bytes area at the top
+  // of stack as described in VESubtarget.cpp.  So, we adjust it here.
+  NumBytes = STI.getAdjustedFrameSize(NumBytes);
 
   // Finally, ensure that the size is sufficiently aligned for the
   // data on the stack.
-  NumBytes = alignTo(NumBytes, MFI.getMaxAlign().value());
+  NumBytes = alignTo(NumBytes, MFI.getMaxAlign());
 
   // Update stack size with corrected value.
   MFI.setStackSize(NumBytes);
@@ -241,16 +244,25 @@ void VEFrameLowering::emitPrologue(MachineFunction &MF,
   emitPrologueInsns(MF, MBB, MBBI, NumBytes, true);
 
   // Emit stack adjust instructions
-  emitSPAdjustment(MF, MBB, MBBI, -NumBytes);
+  MaybeAlign RuntimeAlign =
+      NeedsStackRealignment ? MaybeAlign(MFI.getMaxAlign()) : None;
+  emitSPAdjustment(MF, MBB, MBBI, -(int64_t)NumBytes, RuntimeAlign);
+
+  if (hasBP(MF)) {
+    // Copy SP to BP.
+    BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX17)
+        .addReg(VE::SX11)
+        .addImm(0);
+  }
 
   // Emit stack extend instructions
-  emitSPExtend(MF, MBB, MBBI, -NumBytes);
+  emitSPExtend(MF, MBB, MBBI);
 
-  unsigned regFP = RegInfo.getDwarfRegNum(VE::SX9, true);
+  Register RegFP = RegInfo.getDwarfRegNum(VE::SX9, true);
 
   // Emit ".cfi_def_cfa_register 30".
   unsigned CFIIndex =
-      MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, regFP));
+      MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, RegFP));
   BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
       .addCFIIndex(CFIIndex);
 
@@ -265,7 +277,7 @@ MachineBasicBlock::iterator VEFrameLowering::eliminateCallFramePseudoInstr(
     MachineBasicBlock::iterator I) const {
   if (!hasReservedCallFrame(MF)) {
     MachineInstr &MI = *I;
-    int Size = MI.getOperand(0).getImm();
+    int64_t Size = MI.getOperand(0).getImm();
     if (MI.getOpcode() == VE::ADJCALLSTACKDOWN)
       Size = -Size;
 
@@ -281,20 +293,17 @@ void VEFrameLowering::emitEpilogue(MachineFunction &MF,
   DebugLoc dl = MBBI->getDebugLoc();
   MachineFrameInfo &MFI = MF.getFrameInfo();
 
-  int NumBytes = (int)MFI.getStackSize();
+  uint64_t NumBytes = MFI.getStackSize();
 
   // Emit Epilogue instructions to restore %lr
   emitEpilogueInsns(MF, MBB, MBBI, NumBytes, true);
 }
 
-bool VEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
-  // Reserve call frame if there are no variable sized objects on the stack.
-  return !MF.getFrameInfo().hasVarSizedObjects();
-}
-
 // hasFP - Return true if the specified function should have a dedicated frame
-// pointer register.  This is true if the function has variable sized allocas or
-// if frame pointer elimination is disabled.
+// pointer register.  This is true if the function has variable sized allocas
+// or if frame pointer elimination is disabled.  For the case of VE, we don't
+// implement FP eliminator yet, but we returns false from this function to
+// not refer fp from generated code.
 bool VEFrameLowering::hasFP(const MachineFunction &MF) const {
   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
 
@@ -304,44 +313,41 @@ bool VEFrameLowering::hasFP(const MachineFunction &MF) const {
          MFI.isFrameAddressTaken();
 }
 
+bool VEFrameLowering::hasBP(const MachineFunction &MF) const {
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+
+  return MFI.hasVarSizedObjects() && TRI->needsStackRealignment(MF);
+}
+
 int VEFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
                                             Register &FrameReg) const {
-  const VESubtarget &Subtarget = MF.getSubtarget<VESubtarget>();
   const MachineFrameInfo &MFI = MF.getFrameInfo();
-  const VERegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+  const VERegisterInfo *RegInfo = STI.getRegisterInfo();
   const VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
   bool isFixed = MFI.isFixedObjectIndex(FI);
 
-  // Addressable stack objects are accessed using neg. offsets from
-  // %fp, or positive offsets from %sp.
-  bool UseFP = true;
+  int64_t FrameOffset = MF.getFrameInfo().getObjectOffset(FI);
 
-  // VE uses FP-based references in general, even when "hasFP" is
-  // false. That function is rather a misnomer, because %fp is
-  // actually always available, unless isLeafProc.
   if (FuncInfo->isLeafProc()) {
     // If there's a leaf proc, all offsets need to be %sp-based,
     // because we haven't caused %fp to actually point to our frame.
-    UseFP = false;
-  } else if (isFixed) {
-    // Otherwise, argument access should always use %fp.
-    UseFP = true;
-  } else if (RegInfo->needsStackRealignment(MF)) {
-    // If there is dynamic stack realignment, all local object
-    // references need to be via %sp, to take account of the
-    // re-alignment.
-    UseFP = false;
+    FrameReg = VE::SX11; // %sp
+    return FrameOffset + MF.getFrameInfo().getStackSize();
   }
-
-  int64_t FrameOffset = MF.getFrameInfo().getObjectOffset(FI);
-
-  if (UseFP) {
-    FrameReg = RegInfo->getFrameRegister(MF);
-    return FrameOffset;
+  if (RegInfo->needsStackRealignment(MF) && !isFixed) {
+    // If there is dynamic stack realignment, all local object
+    // references need to be via %sp or %s17 (bp), to take account
+    // of the re-alignment.
+    if (hasBP(MF))
+      FrameReg = VE::SX17; // %bp
+    else
+      FrameReg = VE::SX11; // %sp
+    return FrameOffset + MF.getFrameInfo().getStackSize();
   }
-
-  FrameReg = VE::SX11; // %sp
-  return FrameOffset + MF.getFrameInfo().getStackSize();
+  // Finally, default to using %fp.
+  FrameReg = RegInfo->getFrameRegister(MF);
+  return FrameOffset;
 }
 
 bool VEFrameLowering::isLeafProc(MachineFunction &MF) const {

diff  --git a/llvm/lib/Target/VE/VEFrameLowering.h b/llvm/lib/Target/VE/VEFrameLowering.h
index de0227e613bc..b548d663c504 100644
--- a/llvm/lib/Target/VE/VEFrameLowering.h
+++ b/llvm/lib/Target/VE/VEFrameLowering.h
@@ -28,18 +28,23 @@ class VEFrameLowering : public TargetFrameLowering {
   void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
   void emitPrologueInsns(MachineFunction &MF, MachineBasicBlock &MBB,
-                         MachineBasicBlock::iterator MBBI, int NumBytes,
+                         MachineBasicBlock::iterator MBBI, uint64_t NumBytes,
                          bool RequireFPUpdate) const;
   void emitEpilogueInsns(MachineFunction &MF, MachineBasicBlock &MBB,
-                         MachineBasicBlock::iterator MBBI, int NumBytes,
+                         MachineBasicBlock::iterator MBBI, uint64_t NumBytes,
                          bool RequireFPUpdate) const;
 
   MachineBasicBlock::iterator
   eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
                                 MachineBasicBlock::iterator I) const override;
 
-  bool hasReservedCallFrame(const MachineFunction &MF) const override;
+  bool hasBP(const MachineFunction &MF) const;
   bool hasFP(const MachineFunction &MF) const override;
+  // VE reserves argument space always for call sites in the function
+  // immediately on entry of the current function.
+  bool hasReservedCallFrame(const MachineFunction &MF) const override {
+    return true;
+  }
   void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
                             RegScavenger *RS = nullptr) const override;
 
@@ -58,10 +63,8 @@ class VEFrameLowering : public TargetFrameLowering {
     return Offsets;
   }
 
-  /// targetHandlesStackFrameRounding - Returns true if the target is
-  /// responsible for rounding up the stack frame (probably at emitPrologue
-  /// time).
-  bool targetHandlesStackFrameRounding() const override { return true; }
+protected:
+  const VESubtarget &STI;
 
 private:
   // Returns true if MF is a leaf procedure.
@@ -69,11 +72,12 @@ class VEFrameLowering : public TargetFrameLowering {
 
   // Emits code for adjusting SP in function prologue/epilogue.
   void emitSPAdjustment(MachineFunction &MF, MachineBasicBlock &MBB,
-                        MachineBasicBlock::iterator MBBI, int NumBytes) const;
+                        MachineBasicBlock::iterator MBBI, int64_t NumBytes,
+                        MaybeAlign MayAlign = MaybeAlign()) const;
 
   // Emits code for extending SP in function prologue/epilogue.
   void emitSPExtend(MachineFunction &MF, MachineBasicBlock &MBB,
-                    MachineBasicBlock::iterator MBBI, int NumBytes) const;
+                    MachineBasicBlock::iterator MBBI) const;
 };
 
 } // namespace llvm

diff  --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index 8c611f7f292c..cbdf861307b3 100644
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -583,6 +583,11 @@ VETargetLowering::VETargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::VAEND, MVT::Other, Expand);
   /// } VAARG handling
 
+  /// Stack {
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
+  /// } Stack
+
   /// Int Ops {
   for (MVT IntVT : {MVT::i32, MVT::i64}) {
     // VE has no REM or DIVREM operations.
@@ -641,6 +646,7 @@ const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
     TARGET_NODE_CASE(Lo)
     TARGET_NODE_CASE(Hi)
     TARGET_NODE_CASE(GETFUNPLT)
+    TARGET_NODE_CASE(GETSTACKTOP)
     TARGET_NODE_CASE(GETTLSADDR)
     TARGET_NODE_CASE(CALL)
     TARGET_NODE_CASE(RET_FLAG)
@@ -860,12 +866,79 @@ SDValue VETargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
                      std::min(PtrVT.getSizeInBits(), VT.getSizeInBits()) / 8);
 }
 
+SDValue VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
+                                                  SelectionDAG &DAG) const {
+  // Generate following code.
+  //   (void)__llvm_grow_stack(size);
+  //   ret = GETSTACKTOP;        // pseudo instruction
+  SDLoc DL(Op);
+
+  // Get the inputs.
+  SDNode *Node = Op.getNode();
+  SDValue Chain = Op.getOperand(0);
+  SDValue Size = Op.getOperand(1);
+  MaybeAlign Alignment(Op.getConstantOperandVal(2));
+  EVT VT = Node->getValueType(0);
+
+  // Chain the dynamic stack allocation so that it doesn't modify the stack
+  // pointer when other instructions are using the stack.
+  Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
+
+  const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
+  Align StackAlign = TFI.getStackAlign();
+  bool NeedsAlign = Alignment.valueOrOne() > StackAlign;
+
+  // Prepare arguments
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  Entry.Node = Size;
+  Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
+  Args.push_back(Entry);
+  if (NeedsAlign) {
+    Entry.Node = DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT);
+    Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
+    Args.push_back(Entry);
+  }
+  Type *RetTy = Type::getVoidTy(*DAG.getContext());
+
+  EVT PtrVT = Op.getValueType();
+  SDValue Callee;
+  if (NeedsAlign) {
+    Callee = DAG.getTargetExternalSymbol("__ve_grow_stack_align", PtrVT, 0);
+  } else {
+    Callee = DAG.getTargetExternalSymbol("__ve_grow_stack", PtrVT, 0);
+  }
+
+  TargetLowering::CallLoweringInfo CLI(DAG);
+  CLI.setDebugLoc(DL)
+      .setChain(Chain)
+      .setCallee(CallingConv::PreserveAll, RetTy, Callee, std::move(Args))
+      .setDiscardResult(true);
+  std::pair<SDValue, SDValue> pair = LowerCallTo(CLI);
+  Chain = pair.second;
+  SDValue Result = DAG.getNode(VEISD::GETSTACKTOP, DL, VT, Chain);
+  if (NeedsAlign) {
+    Result = DAG.getNode(ISD::ADD, DL, VT, Result,
+                         DAG.getConstant((Alignment->value() - 1ULL), DL, VT));
+    Result = DAG.getNode(ISD::AND, DL, VT, Result,
+                         DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT));
+  }
+  //  Chain = Result.getValue(1);
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true),
+                             DAG.getIntPtrConstant(0, DL, true), SDValue(), DL);
+
+  SDValue Ops[2] = {Result, Chain};
+  return DAG.getMergeValues(Ops, DL);
+}
+
 SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   default:
     llvm_unreachable("Should not custom lower this!");
   case ISD::BlockAddress:
     return LowerBlockAddress(Op, DAG);
+  case ISD::DYNAMIC_STACKALLOC:
+    return lowerDYNAMIC_STACKALLOC(Op, DAG);
   case ISD::GlobalAddress:
     return LowerGlobalAddress(Op, DAG);
   case ISD::GlobalTLSAddress:

diff  --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h
index a3ead990bccf..097960f05a83 100644
--- a/llvm/lib/Target/VE/VEISelLowering.h
+++ b/llvm/lib/Target/VE/VEISelLowering.h
@@ -27,8 +27,10 @@ enum NodeType : unsigned {
   Hi,
   Lo, // Hi/Lo operations, typically on a global address.
 
-  GETFUNPLT,  // load function address through %plt insturction
-  GETTLSADDR, // load address for TLS access
+  GETFUNPLT,   // load function address through %plt insturction
+  GETTLSADDR,  // load address for TLS access
+  GETSTACKTOP, // retrieve address of stack top (first address of
+               // locals and temporaries)
 
   CALL,            // A call instruction.
   RET_FLAG,        // Return with a flag operand.
@@ -81,6 +83,7 @@ class VETargetLowering : public TargetLowering {
   SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerToTLSGeneralDynamicModel(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
   /// } Custom Lower
 
   SDValue withTargetFlags(SDValue Op, unsigned TF, SelectionDAG &DAG) const;

diff  --git a/llvm/lib/Target/VE/VEInstrInfo.cpp b/llvm/lib/Target/VE/VEInstrInfo.cpp
index 02a63f4aa365..aa19c6ce0687 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.cpp
+++ b/llvm/lib/Target/VE/VEInstrInfo.cpp
@@ -25,7 +25,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 
-#define DEBUG_TYPE "ve"
+#define DEBUG_TYPE "ve-instr-info"
 
 using namespace llvm;
 
@@ -457,6 +457,9 @@ bool VEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
     MI.eraseFromParent(); // The pseudo instruction is gone now.
     return true;
   }
+  case VE::GETSTACKTOP: {
+    return expandGetStackTopPseudo(MI);
+  }
   }
   return false;
 }
@@ -464,8 +467,8 @@ bool VEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
 bool VEInstrInfo::expandExtendStackPseudo(MachineInstr &MI) const {
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
-  const VEInstrInfo &TII =
-      *static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo());
+  const VESubtarget &STI = MF.getSubtarget<VESubtarget>();
+  const VEInstrInfo &TII = *STI.getInstrInfo();
   DebugLoc dl = MBB.findDebugLoc(MI);
 
   // Create following instructions and multiple basic blocks.
@@ -544,3 +547,35 @@ bool VEInstrInfo::expandExtendStackPseudo(MachineInstr &MI) const {
   MI.eraseFromParent(); // The pseudo instruction is gone now.
   return true;
 }
+
+bool VEInstrInfo::expandGetStackTopPseudo(MachineInstr &MI) const {
+  MachineBasicBlock *MBB = MI.getParent();
+  MachineFunction &MF = *MBB->getParent();
+  const VESubtarget &STI = MF.getSubtarget<VESubtarget>();
+  const VEInstrInfo &TII = *STI.getInstrInfo();
+  DebugLoc DL = MBB->findDebugLoc(MI);
+
+  // Create following instruction
+  //
+  //   dst = %sp + target specific frame + the size of parameter area
+
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  const VEFrameLowering &TFL = *STI.getFrameLowering();
+
+  // The VE ABI requires a reserved 176 bytes area at the top
+  // of stack as described in VESubtarget.cpp.  So, we adjust it here.
+  unsigned NumBytes = STI.getAdjustedFrameSize(0);
+
+  // Also adds the size of parameter area.
+  if (MFI.adjustsStack() && TFL.hasReservedCallFrame(MF))
+    NumBytes += MFI.getMaxCallFrameSize();
+
+  BuildMI(*MBB, MI, DL, TII.get(VE::LEArii))
+      .addDef(MI.getOperand(0).getReg())
+      .addReg(VE::SX11)
+      .addImm(0)
+      .addImm(NumBytes);
+
+  MI.eraseFromParent(); // The pseudo instruction is gone now.
+  return true;
+}

diff  --git a/llvm/lib/Target/VE/VEInstrInfo.h b/llvm/lib/Target/VE/VEInstrInfo.h
index 4e28279a6675..7b6662df1d60 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.h
+++ b/llvm/lib/Target/VE/VEInstrInfo.h
@@ -81,6 +81,7 @@ class VEInstrInfo : public VEGenInstrInfo {
   bool expandPostRAPseudo(MachineInstr &MI) const override;
 
   bool expandExtendStackPseudo(MachineInstr &MI) const;
+  bool expandGetStackTopPseudo(MachineInstr &MI) const;
 };
 
 } // namespace llvm

diff  --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td
index 87c8015c775b..c7815efb8c71 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.td
+++ b/llvm/lib/Target/VE/VEInstrInfo.td
@@ -414,6 +414,9 @@ def GetTLSAddr : SDNode<"VEISD::GETTLSADDR", SDT_SPCall,
                         [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
                          SDNPVariadic]>;
 
+// GETSTACKTOP
+def GetStackTop : SDNode<"VEISD::GETSTACKTOP", SDTNone,
+                        [SDNPHasChain, SDNPSideEffect]>;
 
 
 //===----------------------------------------------------------------------===//
@@ -1398,6 +1401,14 @@ def EXTEND_STACK_GUARD : Pseudo<(outs), (ins),
                                 "# EXTEND STACK GUARD",
                                 []>;
 
+// Dynamic stack allocation yields a __llvm_grow_stack for VE targets.
+// These calls are needed to probe the stack when allocating more over
+// %s8 (%sl - stack limit).
+
+let Uses = [SX11], hasSideEffects = 1 in
+def GETSTACKTOP : Pseudo<(outs I64:$dst), (ins),
+                         "# GET STACK TOP",
+                         [(set iPTR:$dst, (GetStackTop))]>;
 // SETCC pattern matches
 //
 //   CMP  %tmp, lhs, rhs     ; compare lhs and rhs

diff  --git a/llvm/lib/Target/VE/VERegisterInfo.cpp b/llvm/lib/Target/VE/VERegisterInfo.cpp
index b0ddc956d7cc..5783a8df69d2 100644
--- a/llvm/lib/Target/VE/VERegisterInfo.cpp
+++ b/llvm/lib/Target/VE/VERegisterInfo.cpp
@@ -34,12 +34,22 @@ VERegisterInfo::VERegisterInfo() : VEGenRegisterInfo(VE::SX10) {}
 
 const MCPhysReg *
 VERegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
-  return CSR_SaveList;
+  switch (MF->getFunction().getCallingConv()) {
+  default:
+    return CSR_SaveList;
+  case CallingConv::PreserveAll:
+    return CSR_preserve_all_SaveList;
+  }
 }
 
 const uint32_t *VERegisterInfo::getCallPreservedMask(const MachineFunction &MF,
                                                      CallingConv::ID CC) const {
-  return CSR_RegMask;
+  switch (CC) {
+  default:
+    return CSR_RegMask;
+  case CallingConv::PreserveAll:
+    return CSR_preserve_all_RegMask;
+  }
 }
 
 const uint32_t *VERegisterInfo::getNoPreservedMask() const {

diff  --git a/llvm/lib/Target/VE/VESubtarget.h b/llvm/lib/Target/VE/VESubtarget.h
index e9637cc16023..f3a2c206162e 100644
--- a/llvm/lib/Target/VE/VESubtarget.h
+++ b/llvm/lib/Target/VE/VESubtarget.h
@@ -42,7 +42,7 @@ class VESubtarget : public VEGenSubtargetInfo {
               const TargetMachine &TM);
 
   const VEInstrInfo *getInstrInfo() const override { return &InstrInfo; }
-  const TargetFrameLowering *getFrameLowering() const override {
+  const VEFrameLowering *getFrameLowering() const override {
     return &FrameLowering;
   }
   const VERegisterInfo *getRegisterInfo() const override {

diff  --git a/llvm/test/CodeGen/VE/alloca.ll b/llvm/test/CodeGen/VE/alloca.ll
new file mode 100644
index 000000000000..a4d349fefd0a
--- /dev/null
+++ b/llvm/test/CodeGen/VE/alloca.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s
+
+declare void @bar(i8*, i64)
+
+; Function Attrs: nounwind
+define void @test(i64 %n) {
+; CHECK-LABEL: test:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    or %s1, 0, %s0
+; CHECK-NEXT:    lea %s0, 15(, %s0)
+; CHECK-NEXT:    and %s0, -16, %s0
+; CHECK-NEXT:    lea %s2, __ve_grow_stack at lo
+; CHECK-NEXT:    and %s2, %s2, (32)0
+; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack at hi(, %s2)
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    lea %s0, 240(, %s11)
+; CHECK-NEXT:    lea %s2, bar at lo
+; CHECK-NEXT:    and %s2, %s2, (32)0
+; CHECK-NEXT:    lea.sl %s12, bar at hi(, %s2)
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %dyna = alloca i8, i64 %n, align 8
+  call void @bar(i8* %dyna, i64 %n)
+  ret void
+}

diff  --git a/llvm/test/CodeGen/VE/alloca_aligned.ll b/llvm/test/CodeGen/VE/alloca_aligned.ll
new file mode 100644
index 000000000000..81cdcb56f792
--- /dev/null
+++ b/llvm/test/CodeGen/VE/alloca_aligned.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s
+
+declare void @bar(i8*, i64)
+
+; Function Attrs: nounwind
+define void @test(i64 %n) {
+; CHECK-LABEL: test:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    or %s2, 0, %s0
+; CHECK-NEXT:    lea %s0, 15(, %s0)
+; CHECK-NEXT:    and %s0, -16, %s0
+; CHECK-NEXT:    lea %s1, __ve_grow_stack_align at lo
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack_align at hi(, %s1)
+; CHECK-NEXT:    or %s1, -32, (0)1
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    lea %s0, 240(, %s11)
+; CHECK-NEXT:    lea %s0, 31(, %s0)
+; CHECK-NEXT:    and %s0, -32, %s0
+; CHECK-NEXT:    lea %s1, bar at lo
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lea.sl %s12, bar at hi(, %s1)
+; CHECK-NEXT:    or %s1, 0, %s2
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %dyna = alloca i8, i64 %n, align 32
+  call void @bar(i8* %dyna, i64 %n)
+  ret void
+}


        


More information about the llvm-commits mailing list