[llvm] ffad4d7 - [z/OS] Implement prologue and epilogue generation for z/OS target.

Kai Nacke via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 13 14:03:37 PST 2021


Author: Neumann Hon
Date: 2021-12-13T17:03:23-05:00
New Revision: ffad4d777b227f91be04020e2cd86ab38e969e39

URL: https://github.com/llvm/llvm-project/commit/ffad4d777b227f91be04020e2cd86ab38e969e39
DIFF: https://github.com/llvm/llvm-project/commit/ffad4d777b227f91be04020e2cd86ab38e969e39.diff

LOG: [z/OS] Implement prologue and epilogue generation for z/OS target.

This patch adds support for prologue and epilogue generation for
the z/OS target under the XPLINK64 ABI for functions with a stack
size of less than 1048576 bytes (huge stack frames).

Reviewed by: uweigand, Kai

Differential Revision: https://reviews.llvm.org/D114457

Added: 
    

Modified: 
    llvm/lib/Target/SystemZ/SystemZCallingConv.td
    llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
    llvm/lib/Target/SystemZ/SystemZFrameLowering.h
    llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
    llvm/test/CodeGen/SystemZ/call-zos-01.ll
    llvm/test/CodeGen/SystemZ/call-zos-vec.ll
    llvm/test/CodeGen/SystemZ/zos-prologue-epilog.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/llvm/lib/Target/SystemZ/SystemZCallingConv.td
index 373023effb4a1..a7ea5e1e4bf80 100644
--- a/llvm/lib/Target/SystemZ/SystemZCallingConv.td
+++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.td
@@ -166,6 +166,7 @@ def CSR_SystemZ_NoRegs : CalleeSavedRegs<(add)>;
 // any non-leaf function and restored in the epilogue for use by the
 // return instruction so it functions exactly like a callee-saved register.
 def CSR_SystemZ_XPLINK64 : CalleeSavedRegs<(add (sequence "R%dD", 7, 15),
+                                                (sequence "R%dD", 4, 4),
                                                 (sequence "F%dD", 15, 8))>;
 
 def CSR_SystemZ_XPLINK64_Vector : CalleeSavedRegs<(add CSR_SystemZ_XPLINK64,

diff  --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 2f7cdfcf7bded..638e4e00d028a 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -818,7 +818,7 @@ bool SystemZELFFrameLowering::usePackedStack(MachineFunction &MF) const {
 }
 
 SystemZXPLINKFrameLowering::SystemZXPLINKFrameLowering()
-    : SystemZFrameLowering(TargetFrameLowering::StackGrowsUp, Align(32), 128,
+    : SystemZFrameLowering(TargetFrameLowering::StackGrowsDown, Align(32), 0,
                            Align(32), /* StackRealignable */ false),
       RegSpillOffsets(-1) {
 
@@ -990,12 +990,183 @@ bool SystemZXPLINKFrameLowering::spillCalleeSavedRegisters(
   return true;
 }
 
+bool SystemZXPLINKFrameLowering::restoreCalleeSavedRegisters(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
+
+  if (CSI.empty())
+    return false;
+
+  MachineFunction &MF = *MBB.getParent();
+  SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
+  const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+  auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
+
+  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  // Restore FPRs in the normal TargetInstrInfo way.
+  for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+    unsigned Reg = CSI[I].getReg();
+    if (SystemZ::FP64BitRegClass.contains(Reg))
+      TII->loadRegFromStackSlot(MBB, MBBI, Reg, CSI[I].getFrameIdx(),
+                                &SystemZ::FP64BitRegClass, TRI);
+    if (SystemZ::VR128BitRegClass.contains(Reg))
+      TII->loadRegFromStackSlot(MBB, MBBI, Reg, CSI[I].getFrameIdx(),
+                                &SystemZ::VR128BitRegClass, TRI);
+  }
+
+  // Restore call-saved GPRs (but not call-clobbered varargs, which at
+  // this point might hold return values).
+  SystemZ::GPRRegs RestoreGPRs = ZFI->getRestoreGPRRegs();
+  if (RestoreGPRs.LowGPR) {
+    assert(isInt<20>(Regs.getStackPointerBias() + RestoreGPRs.GPROffset));
+    if (RestoreGPRs.LowGPR == RestoreGPRs.HighGPR)
+      // Build an LG/L instruction.
+      BuildMI(MBB, MBBI, DL, TII->get(SystemZ::LG), RestoreGPRs.LowGPR)
+          .addReg(Regs.getStackPointerRegister())
+          .addImm(Regs.getStackPointerBias() + RestoreGPRs.GPROffset)
+          .addReg(0);
+    else {
+      // Build an LMG/LM instruction.
+      MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::LMG));
+
+      // Add the explicit register operands.
+      MIB.addReg(RestoreGPRs.LowGPR, RegState::Define);
+      MIB.addReg(RestoreGPRs.HighGPR, RegState::Define);
+
+      // Add the address.
+      MIB.addReg(Regs.getStackPointerRegister());
+      MIB.addImm(Regs.getStackPointerBias() + RestoreGPRs.GPROffset);
+
+      // Do a second scan adding regs as being defined by instruction
+      for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+        unsigned Reg = CSI[I].getReg();
+        if (Reg > RestoreGPRs.LowGPR && Reg < RestoreGPRs.HighGPR)
+          MIB.addReg(Reg, RegState::ImplicitDefine);
+      }
+    }
+  }
+
+  return true;
+}
+
 void SystemZXPLINKFrameLowering::emitPrologue(MachineFunction &MF,
-                                              MachineBasicBlock &MBB) const {}
+                                              MachineBasicBlock &MBB) const {
+  assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
+  const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
+  SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  auto *ZII = static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+  auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
+  MachineFrameInfo &MFFrame = MF.getFrameInfo();
+  MachineInstr *StoreInstr = nullptr;
+  bool HasFP = hasFP(MF);
+  // Debug location must be unknown since the first debug location is used
+  // to determine the end of the prologue.
+  DebugLoc DL;
+  uint64_t Offset = 0;
+
+  // TODO: Support leaf functions; only add size of save+reserved area when
+  // function is non-leaf.
+  MFFrame.setStackSize(MFFrame.getStackSize() + Regs.getCallFrameSize());
+  uint64_t StackSize = MFFrame.getStackSize();
+
+  // FIXME: Implement support for large stack sizes, when the stack extension
+  // routine needs to be called.
+  if (StackSize > 1024 * 1024) {
+    llvm_unreachable("Huge Stack Frame not yet supported on z/OS");
+  }
+
+  if (ZFI->getSpillGPRRegs().LowGPR) {
+    // Skip over the GPR saves.
+    if ((MBBI != MBB.end()) && ((MBBI->getOpcode() == SystemZ::STMG))) {
+      const int Operand = 3;
+      // Now we can set the offset for the operation, since now the Stack
+      // has been finalized.
+      Offset = Regs.getStackPointerBias() + MBBI->getOperand(Operand).getImm();
+      // Maximum displacement for STMG instruction.
+      if (isInt<20>(Offset - StackSize))
+        Offset -= StackSize;
+      else
+        StoreInstr = &*MBBI;
+      MBBI->getOperand(Operand).setImm(Offset);
+      ++MBBI;
+    } else
+      llvm_unreachable("Couldn't skip over GPR saves");
+  }
+
+  if (StackSize) {
+    MachineBasicBlock::iterator InsertPt = StoreInstr ? StoreInstr : MBBI;
+    // Allocate StackSize bytes.
+    int64_t Delta = -int64_t(StackSize);
+
+    // In case the STM(G) instruction also stores SP (R4), but the displacement
+    // is too large, the SP register is manipulated first before storing,
+    // resulting in the wrong value stored and retrieved later. In this case, we
+    // need to temporarily save the value of SP, and store it later to memory.
+    if (StoreInstr && HasFP) {
+      // Insert LR r0,r4 before STMG instruction.
+      BuildMI(MBB, InsertPt, DL, ZII->get(SystemZ::LGR))
+          .addReg(SystemZ::R0D, RegState::Define)
+          .addReg(SystemZ::R4D);
+      // Insert ST r0,xxx(,r4) after STMG instruction.
+      BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::STG), SystemZ::R0D)
+          .addReg(SystemZ::R4D)
+          .addImm(Offset)
+          .addReg(0);
+    }
+
+    emitIncrement(MBB, InsertPt, DL, Regs.getStackPointerRegister(), Delta,
+                  ZII);
+  }
+
+  if (HasFP) {
+    // Copy the base of the frame to Frame Pointer Register.
+    BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::LGR),
+            Regs.getFramePointerRegister())
+        .addReg(Regs.getStackPointerRegister());
+
+    // Mark the FramePtr as live at the beginning of every block except
+    // the entry block.  (We'll have marked R8 as live on entry when
+    // saving the GPRs.)
+    for (auto I = std::next(MF.begin()), E = MF.end(); I != E; ++I)
+      I->addLiveIn(Regs.getFramePointerRegister());
+  }
+}
 
 void SystemZXPLINKFrameLowering::emitEpilogue(MachineFunction &MF,
-                                              MachineBasicBlock &MBB) const {}
+                                              MachineBasicBlock &MBB) const {
+  const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
+  MachineFrameInfo &MFFrame = MF.getFrameInfo();
+  auto *ZII = static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+  auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
+
+  // Skip the return instruction.
+  assert(MBBI->isReturn() && "Can only insert epilogue into returning blocks");
+
+  uint64_t StackSize = MFFrame.getStackSize();
+  if (StackSize) {
+    unsigned SPReg = Regs.getStackPointerRegister();
+    if (ZFI->getRestoreGPRRegs().LowGPR != SPReg) {
+      DebugLoc DL = MBBI->getDebugLoc();
+      emitIncrement(MBB, MBBI, DL, SPReg, StackSize, ZII);
+    }
+  }
+}
 
 bool SystemZXPLINKFrameLowering::hasFP(const MachineFunction &MF) const {
-  return false;
+  return (MF.getFrameInfo().hasVarSizedObjects());
+}
+
+void SystemZXPLINKFrameLowering::processFunctionBeforeFrameFinalized(
+    MachineFunction &MF, RegScavenger *RS) const {
+  MachineFrameInfo &MFFrame = MF.getFrameInfo();
+  const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
+  auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
+
+  // Setup stack frame offset
+  MFFrame.setOffsetAdjustment(Regs.getStackPointerBias());
 }

diff  --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
index af219da79c328..106b9e8ebe065 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -115,11 +115,20 @@ class SystemZXPLINKFrameLowering : public SystemZFrameLowering {
                                  ArrayRef<CalleeSavedInfo> CSI,
                                  const TargetRegisterInfo *TRI) const override;
 
+  bool
+  restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator MBBII,
+                              MutableArrayRef<CalleeSavedInfo> CSI,
+                              const TargetRegisterInfo *TRI) const override;
+
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
 
   void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
 
   bool hasFP(const MachineFunction &MF) const override;
+
+  void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+                                           RegScavenger *RS) const override;
 };
 } // end namespace llvm
 

diff  --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index a5e6c906453f3..24de528507713 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1500,8 +1500,16 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
       assert(VA.isMemLoc() && "Argument not register or memory");
 
       // Create the frame index object for this incoming parameter.
-      int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8,
-                                     VA.getLocMemOffset(), true);
+      // FIXME: Pre-include call frame size in the offset, should not
+      // need to manually add it here.
+      int64_t ArgSPOffset = VA.getLocMemOffset();
+      if (Subtarget.isTargetXPLINK64()) {
+        auto &XPRegs =
+            Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
+        ArgSPOffset += XPRegs.getCallFrameSize();
+      }
+      int FI =
+          MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);
 
       // Create the SelectionDAG nodes corresponding to a load
       // from this parameter.  Unpromoted ints and floats are

diff  --git a/llvm/test/CodeGen/SystemZ/call-zos-01.ll b/llvm/test/CodeGen/SystemZ/call-zos-01.ll
index 7194d09cba16d..1d1a4ec8a4156 100644
--- a/llvm/test/CodeGen/SystemZ/call-zos-01.ll
+++ b/llvm/test/CodeGen/SystemZ/call-zos-01.ll
@@ -86,7 +86,7 @@ entry:
 }
 
 ; CHECK-LABEL: pass_integrals0:
-; CHECK: ag  2, -{{[0-9]+}}(4)
+; CHECK: ag  2, 2328(4)
 ; CHECK-NEXT: lgr 3, 2
 define signext i64 @pass_integrals0(i64 signext %arg0, i32 signext %arg1, i16 signext %arg2, i64 signext %arg3) {
 entry:

diff  --git a/llvm/test/CodeGen/SystemZ/call-zos-vec.ll b/llvm/test/CodeGen/SystemZ/call-zos-vec.ll
index 8d6b93387330f..3571346c23698 100644
--- a/llvm/test/CodeGen/SystemZ/call-zos-vec.ll
+++ b/llvm/test/CodeGen/SystemZ/call-zos-vec.ll
@@ -14,7 +14,7 @@ entry:
 ; CHECK: vaf 1, 1, 27
 ; CHECK: vaf 1, 1, 28
 ; CHECK: vaf 1, 1, 29
-; CHECK: vl  0, 32(4), 4
+; CHECK: vl  0, 2432(4), 4
 ; CHECK: vaf 1, 1, 30
 ; CHECK: vaf 1, 1, 31
 ; CHECK: vaf 24, 1, 0

diff  --git a/llvm/test/CodeGen/SystemZ/zos-prologue-epilog.ll b/llvm/test/CodeGen/SystemZ/zos-prologue-epilog.ll
index 4934fee8410cc..d4196ac7835e2 100644
--- a/llvm/test/CodeGen/SystemZ/zos-prologue-epilog.ll
+++ b/llvm/test/CodeGen/SystemZ/zos-prologue-epilog.ll
@@ -6,7 +6,14 @@
 
 ; Small stack frame.
 ; CHECK-LABEL: func0
-; CHECK64: stmg  6, 7
+; CHECK64: stmg  6, 7, 1872(4)
+; stmg instruction's displacement field must be 2064-dsa_size
+; as per ABI
+; CHECK64: aghi  4, -192
+
+; CHECK64: lg  7, 2072(4)
+; CHECK64: aghi  4, 192
+; CHECK64: b 2(7)
 define void @func0() {
   call i64 (i64) @fun(i64 10)
   ret void
@@ -14,7 +21,12 @@ define void @func0() {
 
 ; Spill all GPR CSRs
 ; CHECK-LABEL: func1
-; CHECK64: stmg 6, 15
+; CHECK64: stmg 6, 15, 1904(4)
+; CHECK64: aghi  4, -160
+
+; CHECK64: lmg 7, 15, 2072(4)
+; CHECK64: aghi  4, 160
+; CHECK64: b 2(7)
 define void @func1(i64 *%ptr) {
   %l01 = load volatile i64, i64 *%ptr
   %l02 = load volatile i64, i64 *%ptr
@@ -67,6 +79,8 @@ define void @func1(i64 *%ptr) {
 
 ; Spill all FPRs and VRs
 ; CHECK-LABEL: func2
+; CHECK64: stmg	6, 7, 1744(4)
+; CHECK64: aghi  4, -320
 ; CHECK64: std	15, {{[0-9]+}}(4)                      * 8-byte Folded Spill
 ; CHECK64: std	14, {{[0-9]+}}(4)                      * 8-byte Folded Spill
 ; CHECK64: std	13, {{[0-9]+}}(4)                      * 8-byte Folded Spill
@@ -83,6 +97,27 @@ define void @func1(i64 *%ptr) {
 ; CHECK64: vst	18, {{[0-9]+}}(4), 4                   * 16-byte Folded Spill
 ; CHECK64: vst	17, {{[0-9]+}}(4), 4                   * 16-byte Folded Spill
 ; CHECK64: vst	16, {{[0-9]+}}(4), 4                   * 16-byte Folded Spill
+
+; CHECK64: ld	15, {{[0-9]+}}(4)                      * 8-byte Folded Reload
+; CHECK64: ld	14, {{[0-9]+}}(4)                      * 8-byte Folded Reload
+; CHECK64: ld	13, {{[0-9]+}}(4)                      * 8-byte Folded Reload
+; CHECK64: ld	12, {{[0-9]+}}(4)                      * 8-byte Folded Reload
+; CHECK64: ld	11, {{[0-9]+}}(4)                      * 8-byte Folded Reload
+; CHECK64: ld	10, {{[0-9]+}}(4)                      * 8-byte Folded Reload
+; CHECK64: ld	9, {{[0-9]+}}(4)                       * 8-byte Folded Reload
+; CHECK64: ld	8, {{[0-9]+}}(4)                       * 8-byte Folded Reload
+; CHECK64: vl	23, {{[0-9]+}}(4), 4                   * 16-byte Folded Reload
+; CHECK64: vl	22, {{[0-9]+}}(4), 4                   * 16-byte Folded Reload
+; CHECK64: vl	21, {{[0-9]+}}(4), 4                   * 16-byte Folded Reload
+; CHECK64: vl	20, {{[0-9]+}}(4), 4                   * 16-byte Folded Reload
+; CHECK64: vl	19, {{[0-9]+}}(4), 4                   * 16-byte Folded Reload
+; CHECK64: vl	18, {{[0-9]+}}(4), 4                   * 16-byte Folded Reload
+; CHECK64: vl	17, {{[0-9]+}}(4), 4                   * 16-byte Folded Reload
+; CHECK64: vl	16, {{[0-9]+}}(4), 4                   * 16-byte Folded Reload
+; CHECK64: lg  7, 2072(4)
+; CHECK64: aghi  4, 320
+; CHECK64: b 2(7)
+
 define void @func2(double *%ptr, <2 x i64> *%vec_ptr) {
   %l00 = load volatile double, double *%ptr
   %l01 = load volatile double, double *%ptr
@@ -232,5 +267,43 @@ define void @func2(double *%ptr, <2 x i64> *%vec_ptr) {
   ret void
 }
 
-declare i64 @fun(i64 %arg0)
+; Big stack frame, force the use of agfi before stmg
+; despite not requiring stack extension routine.
+; CHECK64: agfi  4, -1040768
+; CHECK64: stmg  6, 7, 2064(4)
+; CHECK64: agfi  4, 1040768
+define void @func3() {
+  %arr = alloca [130070 x i64], align 8
+  %ptr = bitcast [130070 x i64]* %arr to i8*
+  call i64 (i8*) @fun1(i8* %ptr)
+  ret void
+}
+
+; Requires the saving of r4 due to variable sized
+; object in stack frame. (Eg: VLA)
+; CHECK64: stmg  4, 8, 1856(4)
+; CHECK64: aghi  4, -192
+; CHECK64: lmg	4, 8, 2048(4)
+define i64 @func4(i64 %n) {
+  %vla = alloca i64, i64 %n, align 8
+  %call = call i64 @fun2(i64 %n, i64* nonnull %vla, i64* nonnull %vla)
+  ret i64 %call
+}
 
+; Require saving of r4 and in addition, a displacement large enough
+; to force use of agfi before stmg.
+; CHECK64: lgr	0, 4
+; CHECK64: agfi	4, -1040192
+; CHECK64: stmg  4, 8, 2048(4)
+; CHECK64: lmg 4, 8, 2048(4)
+define i64 @func5(i64 %n) {
+  %vla = alloca i64, i64 %n, align 8
+  %arr = alloca [130000 x i64], align 8
+  %ptr = bitcast [130000 x i64]* %arr to i64*
+  %call = call i64 @fun2(i64 %n, i64* nonnull %vla, i64* %ptr)
+  ret i64 %call
+}
+
+declare i64 @fun(i64 %arg0)
+declare i64 @fun1(i8* %ptr)
+declare i64 @fun2(i64 %n, i64* %arr0, i64* %arr1)


        


More information about the llvm-commits mailing list