[llvm] eb3e09c - [SystemZ] [z/OS] Add support for generating huge (1 MiB) stack frames in XPLINK64
Neumann Hon via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 24 23:38:41 PST 2022
Author: Neumann Hon
Date: 2022-02-25T02:37:08-05:00
New Revision: eb3e09c9bf1d8808acf7d21f40ab0103121a0d60
URL: https://github.com/llvm/llvm-project/commit/eb3e09c9bf1d8808acf7d21f40ab0103121a0d60
DIFF: https://github.com/llvm/llvm-project/commit/eb3e09c9bf1d8808acf7d21f40ab0103121a0d60.diff
LOG: [SystemZ] [z/OS] Add support for generating huge (1 MiB) stack frames in XPLINK64
This patch extends support for generating huge stack frames on 64-bit XPLINK by implementing the ABI-mandated call to the stack extension routine.
Reviewed By: uweigand
Differential Revision: https://reviews.llvm.org/D120450
Added:
Modified:
llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
llvm/lib/Target/SystemZ/SystemZFrameLowering.h
llvm/lib/Target/SystemZ/SystemZInstrInfo.td
llvm/lib/Target/SystemZ/SystemZScheduleZ13.td
llvm/lib/Target/SystemZ/SystemZScheduleZ14.td
llvm/lib/Target/SystemZ/SystemZScheduleZ15.td
llvm/lib/Target/SystemZ/SystemZScheduleZ196.td
llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td
llvm/test/CodeGen/SystemZ/zos-prologue-epilog.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index 46538f966cd90..75f19e01e9b27 100644
--- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -262,6 +262,13 @@ void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) {
emitCallInformation(CallType::BASR76);
return;
+ case SystemZ::CallBASR_STACKEXT:
+ EmitToStreamer(*OutStreamer, MCInstBuilder(SystemZ::BASR)
+ .addReg(SystemZ::R3D)
+ .addReg(MI->getOperand(0).getReg()));
+ emitCallInformation(CallType::BASR33);
+ return;
+
case SystemZ::CallBRASL:
LoweredMI = MCInstBuilder(SystemZ::BRASL)
.addReg(SystemZ::R14D)
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 610627e7e3f08..b22e1b20b039b 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -1153,12 +1153,6 @@ void SystemZXPLINKFrameLowering::emitPrologue(MachineFunction &MF,
MFFrame.setStackSize(MFFrame.getStackSize() + Regs.getCallFrameSize());
uint64_t StackSize = MFFrame.getStackSize();
- // FIXME: Implement support for large stack sizes, when the stack extension
- // routine needs to be called.
- if (StackSize > 1024 * 1024) {
- llvm_unreachable("Huge Stack Frame not yet supported on z/OS");
- }
-
if (ZFI->getSpillGPRRegs().LowGPR) {
// Skip over the GPR saves.
if ((MBBI != MBB.end()) && ((MBBI->getOpcode() == SystemZ::STMG))) {
@@ -1201,6 +1195,18 @@ void SystemZXPLINKFrameLowering::emitPrologue(MachineFunction &MF,
emitIncrement(MBB, InsertPt, DL, Regs.getStackPointerRegister(), Delta,
ZII);
+
+ // If the requested stack size is larger than the guard page, then we need
+ // to check if we need to call the stack extender. This requires adding a
+ // conditional branch, but splitting the prologue block is not possible at
+ // this point since it would invalidate the SaveBlocks / RestoreBlocks sets
+ // of PEI in the single block function case. Build a pseudo to be handled
+ // later by inlineStackProbe().
+ const uint64_t GuardPageSize = 1024 * 1024;
+ if (StackSize > GuardPageSize) {
+ assert(StoreInstr && "Wrong insertion point");
+ BuildMI(MBB, InsertPt, DL, ZII->get(SystemZ::XPLINK_STACKALLOC));
+ }
}
if (HasFP) {
@@ -1239,6 +1245,74 @@ void SystemZXPLINKFrameLowering::emitEpilogue(MachineFunction &MF,
}
}
+// Emit a compare of the stack pointer against the stack floor, and a call to
+// the LE stack extender if needed.
+void SystemZXPLINKFrameLowering::inlineStackProbe(
+ MachineFunction &MF, MachineBasicBlock &PrologMBB) const {
+ auto *ZII =
+ static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+ MachineInstr *StackAllocMI = nullptr;
+ for (MachineInstr &MI : PrologMBB)
+ if (MI.getOpcode() == SystemZ::XPLINK_STACKALLOC) {
+ StackAllocMI = &MI;
+ break;
+ }
+ if (StackAllocMI == nullptr)
+ return;
+
+ MachineBasicBlock &MBB = PrologMBB;
+ const DebugLoc DL = StackAllocMI->getDebugLoc();
+
+ // The 2nd half of block MBB after split.
+ MachineBasicBlock *NextMBB;
+
+ // Add new basic block for the call to the stack overflow function.
+ MachineBasicBlock *StackExtMBB =
+ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+ MF.push_back(StackExtMBB);
+
+ // LG r3,72(,r3)
+ BuildMI(StackExtMBB, DL, ZII->get(SystemZ::LG), SystemZ::R3D)
+ .addReg(SystemZ::R3D)
+ .addImm(72)
+ .addReg(0);
+ // BASR r3,r3
+ BuildMI(StackExtMBB, DL, ZII->get(SystemZ::CallBASR_STACKEXT))
+ .addReg(SystemZ::R3D);
+
+ // LLGT r3,1208
+ BuildMI(MBB, StackAllocMI, DL, ZII->get(SystemZ::LLGT), SystemZ::R3D)
+ .addReg(0)
+ .addImm(1208)
+ .addReg(0);
+ // CG r4,64(,r3)
+ BuildMI(MBB, StackAllocMI, DL, ZII->get(SystemZ::CG))
+ .addReg(SystemZ::R4D)
+ .addReg(SystemZ::R3D)
+ .addImm(64)
+ .addReg(0);
+ // JLL b'0100',F'37'
+ BuildMI(MBB, StackAllocMI, DL, ZII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_ICMP)
+ .addImm(SystemZ::CCMASK_CMP_LT)
+ .addMBB(StackExtMBB);
+
+ NextMBB = SystemZ::splitBlockBefore(StackAllocMI, &MBB);
+ MBB.addSuccessor(NextMBB);
+ MBB.addSuccessor(StackExtMBB);
+
+ // Add jump back from stack extension BB.
+ BuildMI(StackExtMBB, DL, ZII->get(SystemZ::J)).addMBB(NextMBB);
+ StackExtMBB->addSuccessor(NextMBB);
+
+ StackAllocMI->eraseFromParent();
+
+ // Compute the live-in lists for the new blocks.
+ recomputeLiveIns(*NextMBB);
+ recomputeLiveIns(*StackExtMBB);
+}
+
bool SystemZXPLINKFrameLowering::hasFP(const MachineFunction &MF) const {
return (MF.getFrameInfo().hasVarSizedObjects());
}
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
index 2b3d7efed53b8..bec83a9457e04 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -127,6 +127,9 @@ class SystemZXPLINKFrameLowering : public SystemZFrameLowering {
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+ void inlineStackProbe(MachineFunction &MF,
+ MachineBasicBlock &PrologMBB) const override;
+
bool hasFP(const MachineFunction &MF) const override;
void processFunctionBeforeFrameFinalized(MachineFunction &MF,
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index c47731b26e95d..ed7e3c02a10d0 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -42,6 +42,10 @@ let Defs = [R1D, R15D, CC], Uses = [R15D], hasNoSchedulingInfo = 1,
hasSideEffects = 1 in
def PROBED_STACKALLOC : Pseudo<(outs), (ins i64imm:$stacksize), []>;
+let Defs = [R3D, CC], Uses = [R3D, R4D], hasNoSchedulingInfo = 1,
+ hasSideEffects = 1 in
+ def XPLINK_STACKALLOC : Pseudo<(outs), (ins), []>;
+
//===----------------------------------------------------------------------===//
// Branch instructions
//===----------------------------------------------------------------------===//
@@ -285,6 +289,10 @@ let Predicates = [IsTargetXPLINK64] in {
def CallBASR_XPLINK64 : Alias<4, (outs), (ins ADDR64:$R2, variable_ops),
[(z_call ADDR64:$R2)]>;
}
+
+ let isCall = 1, Defs = [R3D, CC], Uses = [FPC] in {
+ def CallBASR_STACKEXT : Alias<4, (outs), (ins ADDR64:$R2), []>;
+ }
}
// Regular calls.
diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td
index ac92501470155..fd01a8a941c9d 100644
--- a/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td
+++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td
@@ -168,7 +168,7 @@ def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "CL(G)?T(Asm.*)?$")>;
// Call
def : InstRW<[WLat1, VBU, FXa2, GroupAlone], (instregex "(Call)?BRAS$")>;
def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BRASL(_XPLINK64)?$")>;
-def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64)?$")>;
+def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64|_STACKEXT)?$")>;
def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "TLS_(G|L)DCALL$")>;
// Return
diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td
index 683b66a6f1edc..3f406736a71ff 100644
--- a/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td
+++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td
@@ -169,7 +169,7 @@ def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "CL(G)?T(Asm.*)?$")>;
// Call
def : InstRW<[WLat1, VBU, FXa2, GroupAlone], (instregex "(Call)?BRAS$")>;
def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BRASL(_XPLINK64)?$")>;
-def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64)?$")>;
+def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64|_STACKEXT)?$")>;
def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "TLS_(G|L)DCALL$")>;
// Return
diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td
index 2ebdf508f22ba..6ae911c3f3ebe 100644
--- a/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td
+++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td
@@ -169,7 +169,7 @@ def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "CL(G)?T(Asm.*)?$")>;
// Call
def : InstRW<[WLat1, VBU, FXa2, GroupAlone], (instregex "(Call)?BRAS$")>;
def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BRASL(_XPLINK64)?$")>;
-def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64)?$")>;
+def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64|_STACKEXT)?$")>;
def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "TLS_(G|L)DCALL$")>;
// Return
diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td
index 51c87c2380c0e..173cf960d2bd0 100644
--- a/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td
+++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td
@@ -147,7 +147,7 @@ def : InstRW<[WLat1, FXU, NormalGr], (instregex "CL(F|G)IT(Asm.*)?$")>;
// Call
def : InstRW<[WLat1, LSU, FXU2, GroupAlone], (instregex "(Call)?BRAS$")>;
def : InstRW<[WLat1, LSU, FXU2, GroupAlone], (instregex "(Call)?BRASL(_XPLINK64)?$")>;
-def : InstRW<[WLat1, LSU, FXU2, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64)?$")>;
+def : InstRW<[WLat1, LSU, FXU2, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64|_STACKEXT)?$")>;
def : InstRW<[WLat1, LSU, FXU2, GroupAlone], (instregex "TLS_(G|L)DCALL$")>;
// Return
diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td b/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td
index 8f2379ce052af..d2060471d65ed 100644
--- a/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td
+++ b/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td
@@ -152,7 +152,7 @@ def : InstRW<[WLat1, FXU, LSU, NormalGr], (instregex "CL(G)?T(Asm.*)?$")>;
// Call
def : InstRW<[WLat1, FXU2, VBU, GroupAlone], (instregex "(Call)?BRAS$")>;
def : InstRW<[WLat1, FXU2, LSU, GroupAlone], (instregex "(Call)?BRASL(_XPLINK64)?$")>;
-def : InstRW<[WLat1, FXU2, LSU, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64)?$")>;
+def : InstRW<[WLat1, FXU2, LSU, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64|_STACKEXT)?$")>;
def : InstRW<[WLat1, FXU2, LSU, GroupAlone], (instregex "TLS_(G|L)DCALL$")>;
// Return
diff --git a/llvm/test/CodeGen/SystemZ/zos-prologue-epilog.ll b/llvm/test/CodeGen/SystemZ/zos-prologue-epilog.ll
index 563896fb93fbe..c7d7cdf995ce7 100644
--- a/llvm/test/CodeGen/SystemZ/zos-prologue-epilog.ll
+++ b/llvm/test/CodeGen/SystemZ/zos-prologue-epilog.ll
@@ -312,6 +312,22 @@ define i64 @func5(i64 %n) {
ret i64 %call
}
+; CHECK-LABEL: large_stack
+; CHECK64: agfi 4, -1048768
+; CHECK64-NEXT: llgt 3, 1208
+; CHECK64-NEXT: cg 4, 64(3)
+; CHECK64-NEXT: jhe
+; CHECK64: * %bb.1:
+; CHECK64: lg 3, 72(3)
+; CHECK64: basr 3, 3
+; CHECK64: stmg 6, 7, 2064(4)
+define void @large_stack() {
+ %arr = alloca [131072 x i64], align 8
+ %ptr = bitcast [131072 x i64]* %arr to i8*
+ call i64 (i8*) @fun1(i8* %ptr)
+ ret void
+}
+
declare i64 @fun(i64 %arg0)
declare i64 @fun1(i8* %ptr)
declare i64 @fun2(i64 %n, i64* %arr0, i64* %arr1)
More information about the llvm-commits
mailing list