[llvm] ffad4d7 - [z/OS] Implement prologue and epilogue generation for z/OS target.
Kai Nacke via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 13 14:03:37 PST 2021
Author: Neumann Hon
Date: 2021-12-13T17:03:23-05:00
New Revision: ffad4d777b227f91be04020e2cd86ab38e969e39
URL: https://github.com/llvm/llvm-project/commit/ffad4d777b227f91be04020e2cd86ab38e969e39
DIFF: https://github.com/llvm/llvm-project/commit/ffad4d777b227f91be04020e2cd86ab38e969e39.diff
LOG: [z/OS] Implement prologue and epilogue generation for z/OS target.
This patch adds support for prologue and epilogue generation for
the z/OS target under the XPLINK64 ABI for functions with a stack
size of less than 1048576 bytes (huge stack frames).
Reviewed by: uweigand, Kai
Differential Revision: https://reviews.llvm.org/D114457
Added:
Modified:
llvm/lib/Target/SystemZ/SystemZCallingConv.td
llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
llvm/lib/Target/SystemZ/SystemZFrameLowering.h
llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
llvm/test/CodeGen/SystemZ/call-zos-01.ll
llvm/test/CodeGen/SystemZ/call-zos-vec.ll
llvm/test/CodeGen/SystemZ/zos-prologue-epilog.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/llvm/lib/Target/SystemZ/SystemZCallingConv.td
index 373023effb4a1..a7ea5e1e4bf80 100644
--- a/llvm/lib/Target/SystemZ/SystemZCallingConv.td
+++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.td
@@ -166,6 +166,7 @@ def CSR_SystemZ_NoRegs : CalleeSavedRegs<(add)>;
// any non-leaf function and restored in the epilogue for use by the
// return instruction so it functions exactly like a callee-saved register.
def CSR_SystemZ_XPLINK64 : CalleeSavedRegs<(add (sequence "R%dD", 7, 15),
+ (sequence "R%dD", 4, 4),
(sequence "F%dD", 15, 8))>;
def CSR_SystemZ_XPLINK64_Vector : CalleeSavedRegs<(add CSR_SystemZ_XPLINK64,
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 2f7cdfcf7bded..638e4e00d028a 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -818,7 +818,7 @@ bool SystemZELFFrameLowering::usePackedStack(MachineFunction &MF) const {
}
SystemZXPLINKFrameLowering::SystemZXPLINKFrameLowering()
- : SystemZFrameLowering(TargetFrameLowering::StackGrowsUp, Align(32), 128,
+ : SystemZFrameLowering(TargetFrameLowering::StackGrowsDown, Align(32), 0,
Align(32), /* StackRealignable */ false),
RegSpillOffsets(-1) {
@@ -990,12 +990,183 @@ bool SystemZXPLINKFrameLowering::spillCalleeSavedRegisters(
return true;
}
+bool SystemZXPLINKFrameLowering::restoreCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
+
+ if (CSI.empty())
+ return false;
+
+ MachineFunction &MF = *MBB.getParent();
+ SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
+ const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
+
+ DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Restore FPRs in the normal TargetInstrInfo way.
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ unsigned Reg = CSI[I].getReg();
+ if (SystemZ::FP64BitRegClass.contains(Reg))
+ TII->loadRegFromStackSlot(MBB, MBBI, Reg, CSI[I].getFrameIdx(),
+ &SystemZ::FP64BitRegClass, TRI);
+ if (SystemZ::VR128BitRegClass.contains(Reg))
+ TII->loadRegFromStackSlot(MBB, MBBI, Reg, CSI[I].getFrameIdx(),
+ &SystemZ::VR128BitRegClass, TRI);
+ }
+
+ // Restore call-saved GPRs (but not call-clobbered varargs, which at
+ // this point might hold return values).
+ SystemZ::GPRRegs RestoreGPRs = ZFI->getRestoreGPRRegs();
+ if (RestoreGPRs.LowGPR) {
+ assert(isInt<20>(Regs.getStackPointerBias() + RestoreGPRs.GPROffset));
+ if (RestoreGPRs.LowGPR == RestoreGPRs.HighGPR)
+ // Build an LG/L instruction.
+ BuildMI(MBB, MBBI, DL, TII->get(SystemZ::LG), RestoreGPRs.LowGPR)
+ .addReg(Regs.getStackPointerRegister())
+ .addImm(Regs.getStackPointerBias() + RestoreGPRs.GPROffset)
+ .addReg(0);
+ else {
+ // Build an LMG/LM instruction.
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::LMG));
+
+ // Add the explicit register operands.
+ MIB.addReg(RestoreGPRs.LowGPR, RegState::Define);
+ MIB.addReg(RestoreGPRs.HighGPR, RegState::Define);
+
+ // Add the address.
+ MIB.addReg(Regs.getStackPointerRegister());
+ MIB.addImm(Regs.getStackPointerBias() + RestoreGPRs.GPROffset);
+
+ // Do a second scan adding regs as being defined by instruction
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ unsigned Reg = CSI[I].getReg();
+ if (Reg > RestoreGPRs.LowGPR && Reg < RestoreGPRs.HighGPR)
+ MIB.addReg(Reg, RegState::ImplicitDefine);
+ }
+ }
+ }
+
+ return true;
+}
+
void SystemZXPLINKFrameLowering::emitPrologue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {}
+ MachineBasicBlock &MBB) const {
+ assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
+ const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
+ SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ auto *ZII = static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+ auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
+ MachineFrameInfo &MFFrame = MF.getFrameInfo();
+ MachineInstr *StoreInstr = nullptr;
+ bool HasFP = hasFP(MF);
+ // Debug location must be unknown since the first debug location is used
+ // to determine the end of the prologue.
+ DebugLoc DL;
+ uint64_t Offset = 0;
+
+ // TODO: Support leaf functions; only add size of save+reserved area when
+ // function is non-leaf.
+ MFFrame.setStackSize(MFFrame.getStackSize() + Regs.getCallFrameSize());
+ uint64_t StackSize = MFFrame.getStackSize();
+
+ // FIXME: Implement support for large stack sizes, when the stack extension
+ // routine needs to be called.
+ if (StackSize > 1024 * 1024) {
+ llvm_unreachable("Huge Stack Frame not yet supported on z/OS");
+ }
+
+ if (ZFI->getSpillGPRRegs().LowGPR) {
+ // Skip over the GPR saves.
+ if ((MBBI != MBB.end()) && ((MBBI->getOpcode() == SystemZ::STMG))) {
+ const int Operand = 3;
+ // Now we can set the offset for the operation, since now the Stack
+ // has been finalized.
+ Offset = Regs.getStackPointerBias() + MBBI->getOperand(Operand).getImm();
+ // Maximum displacement for STMG instruction.
+ if (isInt<20>(Offset - StackSize))
+ Offset -= StackSize;
+ else
+ StoreInstr = &*MBBI;
+ MBBI->getOperand(Operand).setImm(Offset);
+ ++MBBI;
+ } else
+ llvm_unreachable("Couldn't skip over GPR saves");
+ }
+
+ if (StackSize) {
+ MachineBasicBlock::iterator InsertPt = StoreInstr ? StoreInstr : MBBI;
+ // Allocate StackSize bytes.
+ int64_t Delta = -int64_t(StackSize);
+
+ // In case the STM(G) instruction also stores SP (R4), but the displacement
+ // is too large, the SP register is manipulated first before storing,
+ // resulting in the wrong value stored and retrieved later. In this case, we
+ // need to temporarily save the value of SP, and store it later to memory.
+ if (StoreInstr && HasFP) {
+ // Insert LR r0,r4 before STMG instruction.
+ BuildMI(MBB, InsertPt, DL, ZII->get(SystemZ::LGR))
+ .addReg(SystemZ::R0D, RegState::Define)
+ .addReg(SystemZ::R4D);
+ // Insert ST r0,xxx(,r4) after STMG instruction.
+ BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::STG), SystemZ::R0D)
+ .addReg(SystemZ::R4D)
+ .addImm(Offset)
+ .addReg(0);
+ }
+
+ emitIncrement(MBB, InsertPt, DL, Regs.getStackPointerRegister(), Delta,
+ ZII);
+ }
+
+ if (HasFP) {
+ // Copy the base of the frame to Frame Pointer Register.
+ BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::LGR),
+ Regs.getFramePointerRegister())
+ .addReg(Regs.getStackPointerRegister());
+
+ // Mark the FramePtr as live at the beginning of every block except
+ // the entry block. (We'll have marked R8 as live on entry when
+ // saving the GPRs.)
+ for (auto I = std::next(MF.begin()), E = MF.end(); I != E; ++I)
+ I->addLiveIn(Regs.getFramePointerRegister());
+ }
+}
void SystemZXPLINKFrameLowering::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {}
+ MachineBasicBlock &MBB) const {
+ const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
+ MachineFrameInfo &MFFrame = MF.getFrameInfo();
+ auto *ZII = static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+ auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
+
+ // Skip the return instruction.
+ assert(MBBI->isReturn() && "Can only insert epilogue into returning blocks");
+
+ uint64_t StackSize = MFFrame.getStackSize();
+ if (StackSize) {
+ unsigned SPReg = Regs.getStackPointerRegister();
+ if (ZFI->getRestoreGPRRegs().LowGPR != SPReg) {
+ DebugLoc DL = MBBI->getDebugLoc();
+ emitIncrement(MBB, MBBI, DL, SPReg, StackSize, ZII);
+ }
+ }
+}
bool SystemZXPLINKFrameLowering::hasFP(const MachineFunction &MF) const {
- return false;
+ return (MF.getFrameInfo().hasVarSizedObjects());
+}
+
+void SystemZXPLINKFrameLowering::processFunctionBeforeFrameFinalized(
+ MachineFunction &MF, RegScavenger *RS) const {
+ MachineFrameInfo &MFFrame = MF.getFrameInfo();
+ const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
+ auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
+
+ // Setup stack frame offset
+ MFFrame.setOffsetAdjustment(Regs.getStackPointerBias());
}
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
index af219da79c328..106b9e8ebe065 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -115,11 +115,20 @@ class SystemZXPLINKFrameLowering : public SystemZFrameLowering {
ArrayRef<CalleeSavedInfo> CSI,
const TargetRegisterInfo *TRI) const override;
+ bool
+ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBII,
+ MutableArrayRef<CalleeSavedInfo> CSI,
+ const TargetRegisterInfo *TRI) const override;
+
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
bool hasFP(const MachineFunction &MF) const override;
+
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS) const override;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index a5e6c906453f3..24de528507713 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1500,8 +1500,16 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
assert(VA.isMemLoc() && "Argument not register or memory");
// Create the frame index object for this incoming parameter.
- int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8,
- VA.getLocMemOffset(), true);
+ // FIXME: Pre-include call frame size in the offset, should not
+ // need to manually add it here.
+ int64_t ArgSPOffset = VA.getLocMemOffset();
+ if (Subtarget.isTargetXPLINK64()) {
+ auto &XPRegs =
+ Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
+ ArgSPOffset += XPRegs.getCallFrameSize();
+ }
+ int FI =
+ MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);
// Create the SelectionDAG nodes corresponding to a load
// from this parameter. Unpromoted ints and floats are
diff --git a/llvm/test/CodeGen/SystemZ/call-zos-01.ll b/llvm/test/CodeGen/SystemZ/call-zos-01.ll
index 7194d09cba16d..1d1a4ec8a4156 100644
--- a/llvm/test/CodeGen/SystemZ/call-zos-01.ll
+++ b/llvm/test/CodeGen/SystemZ/call-zos-01.ll
@@ -86,7 +86,7 @@ entry:
}
; CHECK-LABEL: pass_integrals0:
-; CHECK: ag 2, -{{[0-9]+}}(4)
+; CHECK: ag 2, 2328(4)
; CHECK-NEXT: lgr 3, 2
define signext i64 @pass_integrals0(i64 signext %arg0, i32 signext %arg1, i16 signext %arg2, i64 signext %arg3) {
entry:
diff --git a/llvm/test/CodeGen/SystemZ/call-zos-vec.ll b/llvm/test/CodeGen/SystemZ/call-zos-vec.ll
index 8d6b93387330f..3571346c23698 100644
--- a/llvm/test/CodeGen/SystemZ/call-zos-vec.ll
+++ b/llvm/test/CodeGen/SystemZ/call-zos-vec.ll
@@ -14,7 +14,7 @@ entry:
; CHECK: vaf 1, 1, 27
; CHECK: vaf 1, 1, 28
; CHECK: vaf 1, 1, 29
-; CHECK: vl 0, 32(4), 4
+; CHECK: vl 0, 2432(4), 4
; CHECK: vaf 1, 1, 30
; CHECK: vaf 1, 1, 31
; CHECK: vaf 24, 1, 0
diff --git a/llvm/test/CodeGen/SystemZ/zos-prologue-epilog.ll b/llvm/test/CodeGen/SystemZ/zos-prologue-epilog.ll
index 4934fee8410cc..d4196ac7835e2 100644
--- a/llvm/test/CodeGen/SystemZ/zos-prologue-epilog.ll
+++ b/llvm/test/CodeGen/SystemZ/zos-prologue-epilog.ll
@@ -6,7 +6,14 @@
; Small stack frame.
; CHECK-LABEL: func0
-; CHECK64: stmg 6, 7
+; CHECK64: stmg 6, 7, 1872(4)
+; stmg instruction's displacement field must be 2064-dsa_size
+; as per ABI
+; CHECK64: aghi 4, -192
+
+; CHECK64: lg 7, 2072(4)
+; CHECK64: aghi 4, 192
+; CHECK64: b 2(7)
define void @func0() {
call i64 (i64) @fun(i64 10)
ret void
@@ -14,7 +21,12 @@ define void @func0() {
; Spill all GPR CSRs
; CHECK-LABEL: func1
-; CHECK64: stmg 6, 15
+; CHECK64: stmg 6, 15, 1904(4)
+; CHECK64: aghi 4, -160
+
+; CHECK64: lmg 7, 15, 2072(4)
+; CHECK64: aghi 4, 160
+; CHECK64: b 2(7)
define void @func1(i64 *%ptr) {
%l01 = load volatile i64, i64 *%ptr
%l02 = load volatile i64, i64 *%ptr
@@ -67,6 +79,8 @@ define void @func1(i64 *%ptr) {
; Spill all FPRs and VRs
; CHECK-LABEL: func2
+; CHECK64: stmg 6, 7, 1744(4)
+; CHECK64: aghi 4, -320
; CHECK64: std 15, {{[0-9]+}}(4) * 8-byte Folded Spill
; CHECK64: std 14, {{[0-9]+}}(4) * 8-byte Folded Spill
; CHECK64: std 13, {{[0-9]+}}(4) * 8-byte Folded Spill
@@ -83,6 +97,27 @@ define void @func1(i64 *%ptr) {
; CHECK64: vst 18, {{[0-9]+}}(4), 4 * 16-byte Folded Spill
; CHECK64: vst 17, {{[0-9]+}}(4), 4 * 16-byte Folded Spill
; CHECK64: vst 16, {{[0-9]+}}(4), 4 * 16-byte Folded Spill
+
+; CHECK64: ld 15, {{[0-9]+}}(4) * 8-byte Folded Reload
+; CHECK64: ld 14, {{[0-9]+}}(4) * 8-byte Folded Reload
+; CHECK64: ld 13, {{[0-9]+}}(4) * 8-byte Folded Reload
+; CHECK64: ld 12, {{[0-9]+}}(4) * 8-byte Folded Reload
+; CHECK64: ld 11, {{[0-9]+}}(4) * 8-byte Folded Reload
+; CHECK64: ld 10, {{[0-9]+}}(4) * 8-byte Folded Reload
+; CHECK64: ld 9, {{[0-9]+}}(4) * 8-byte Folded Reload
+; CHECK64: ld 8, {{[0-9]+}}(4) * 8-byte Folded Reload
+; CHECK64: vl 23, {{[0-9]+}}(4), 4 * 16-byte Folded Reload
+; CHECK64: vl 22, {{[0-9]+}}(4), 4 * 16-byte Folded Reload
+; CHECK64: vl 21, {{[0-9]+}}(4), 4 * 16-byte Folded Reload
+; CHECK64: vl 20, {{[0-9]+}}(4), 4 * 16-byte Folded Reload
+; CHECK64: vl 19, {{[0-9]+}}(4), 4 * 16-byte Folded Reload
+; CHECK64: vl 18, {{[0-9]+}}(4), 4 * 16-byte Folded Reload
+; CHECK64: vl 17, {{[0-9]+}}(4), 4 * 16-byte Folded Reload
+; CHECK64: vl 16, {{[0-9]+}}(4), 4 * 16-byte Folded Reload
+; CHECK64: lg 7, 2072(4)
+; CHECK64: aghi 4, 320
+; CHECK64: b 2(7)
+
define void @func2(double *%ptr, <2 x i64> *%vec_ptr) {
%l00 = load volatile double, double *%ptr
%l01 = load volatile double, double *%ptr
@@ -232,5 +267,43 @@ define void @func2(double *%ptr, <2 x i64> *%vec_ptr) {
ret void
}
-declare i64 @fun(i64 %arg0)
+; Big stack frame, force the use of agfi before stmg
+; despite not requiring stack extension routine.
+; CHECK64: agfi 4, -1040768
+; CHECK64: stmg 6, 7, 2064(4)
+; CHECK64: agfi 4, 1040768
+define void @func3() {
+ %arr = alloca [130070 x i64], align 8
+ %ptr = bitcast [130070 x i64]* %arr to i8*
+ call i64 (i8*) @fun1(i8* %ptr)
+ ret void
+}
+
+; Requires the saving of r4 due to variable sized
+; object in stack frame. (Eg: VLA)
+; CHECK64: stmg 4, 8, 1856(4)
+; CHECK64: aghi 4, -192
+; CHECK64: lmg 4, 8, 2048(4)
+define i64 @func4(i64 %n) {
+ %vla = alloca i64, i64 %n, align 8
+ %call = call i64 @fun2(i64 %n, i64* nonnull %vla, i64* nonnull %vla)
+ ret i64 %call
+}
+; Require saving of r4 and in addition, a displacement large enough
+; to force use of agfi before stmg.
+; CHECK64: lgr 0, 4
+; CHECK64: agfi 4, -1040192
+; CHECK64: stmg 4, 8, 2048(4)
+; CHECK64: lmg 4, 8, 2048(4)
+define i64 @func5(i64 %n) {
+ %vla = alloca i64, i64 %n, align 8
+ %arr = alloca [130000 x i64], align 8
+ %ptr = bitcast [130000 x i64]* %arr to i64*
+ %call = call i64 @fun2(i64 %n, i64* nonnull %vla, i64* %ptr)
+ ret i64 %call
+}
+
+declare i64 @fun(i64 %arg0)
+declare i64 @fun1(i8* %ptr)
+declare i64 @fun2(i64 %n, i64* %arr0, i64* %arr1)
More information about the llvm-commits
mailing list