[clang] 515bfc6 - [SystemZ] Implement -fstack-clash-protection
Jonas Paulsson via cfe-commits
cfe-commits at lists.llvm.org
Sat Jun 6 09:39:03 PDT 2020
Author: Jonas Paulsson
Date: 2020-06-06T18:38:36+02:00
New Revision: 515bfc66eaced830c03b2ec187bef0d8c4dc6915
URL: https://github.com/llvm/llvm-project/commit/515bfc66eaced830c03b2ec187bef0d8c4dc6915
DIFF: https://github.com/llvm/llvm-project/commit/515bfc66eaced830c03b2ec187bef0d8c4dc6915.diff
LOG: [SystemZ] Implement -fstack-clash-protection
Probing of allocated stack space is now done when this option is passed. The
purpose is to protect against the stack clash attack (see
https://www.qualys.com/2017/06/19/stack-clash/stack-clash.txt).
Review: Ulrich Weigand
Differential Revision: https://reviews.llvm.org/D78717
Added:
clang/test/Driver/stack-clash-protection-02.c
llvm/test/CodeGen/SystemZ/stack-clash-dynamic-alloca.ll
llvm/test/CodeGen/SystemZ/stack-clash-protection.ll
Modified:
clang/docs/ReleaseNotes.rst
clang/lib/Basic/Targets/SystemZ.h
clang/lib/Driver/ToolChains/Clang.cpp
clang/test/CodeGen/stack-clash-protection.c
llvm/include/llvm/ADT/Triple.h
llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
llvm/lib/Target/SystemZ/SystemZFrameLowering.h
llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
llvm/lib/Target/SystemZ/SystemZISelLowering.h
llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
llvm/lib/Target/SystemZ/SystemZInstrInfo.h
llvm/lib/Target/SystemZ/SystemZInstrInfo.td
llvm/lib/Target/SystemZ/SystemZOperators.td
Removed:
################################################################################
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 25ff809120de..15e6d35117b4 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -94,8 +94,8 @@ New Compiler Flags
------------------
- -fstack-clash-protection will provide a protection against the stack clash
- attack for x86 architecture through automatic probing of each page of
- allocated stack.
+ attack for x86 and s390x architectures through automatic probing of each page
+ of allocated stack.
- -ffp-exception-behavior={ignore,maytrap,strict} allows the user to specify
the floating-point exception behavior. The default setting is ``ignore``.
diff --git a/clang/lib/Basic/Targets/SystemZ.h b/clang/lib/Basic/Targets/SystemZ.h
index 1c8822e2bc2d..134b0313b86a 100644
--- a/clang/lib/Basic/Targets/SystemZ.h
+++ b/clang/lib/Basic/Targets/SystemZ.h
@@ -64,6 +64,10 @@ class LLVM_LIBRARY_VISIBILITY SystemZTargetInfo : public TargetInfo {
ArrayRef<TargetInfo::AddlRegName> getGCCAddlRegNames() const override;
+ bool isSPRegName(StringRef RegName) const override {
+ return RegName.equals("r15");
+ }
+
bool validateAsmConstraint(const char *&Name,
TargetInfo::ConstraintInfo &info) const override;
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index b20048768e44..513f32caad8a 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -2997,7 +2997,7 @@ static void RenderSCPOptions(const ToolChain &TC, const ArgList &Args,
if (!EffectiveTriple.isOSLinux())
return;
- if (!EffectiveTriple.isX86())
+ if (!EffectiveTriple.isX86() && !EffectiveTriple.isSystemZ())
return;
if (Args.hasFlag(options::OPT_fstack_clash_protection,
diff --git a/clang/test/CodeGen/stack-clash-protection.c b/clang/test/CodeGen/stack-clash-protection.c
index f970bf909cbe..eb48da8ff9e9 100644
--- a/clang/test/CodeGen/stack-clash-protection.c
+++ b/clang/test/CodeGen/stack-clash-protection.c
@@ -1,5 +1,6 @@
// Check the correct function attributes are generated
// RUN: %clang_cc1 -triple x86_64-linux -O0 -S -emit-llvm -o- %s -fstack-clash-protection | FileCheck %s
+// RUN: %clang_cc1 -triple s390x-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection | FileCheck %s
// CHECK: define void @large_stack() #[[A:.*]] {
void large_stack() {
diff --git a/clang/test/Driver/stack-clash-protection-02.c b/clang/test/Driver/stack-clash-protection-02.c
new file mode 100644
index 000000000000..25ff3b5d6940
--- /dev/null
+++ b/clang/test/Driver/stack-clash-protection-02.c
@@ -0,0 +1,13 @@
+// RUN: %clang -target s390x-linux-gnu -fstack-clash-protection -### %s 2>&1 | FileCheck %s -check-prefix=SystemZ
+// SystemZ: "-fstack-clash-protection"
+// RUN: %clang -target s390x-linux-gnu -fstack-clash-protection -S -emit-llvm -o %t.ll %s 2>&1 | FileCheck %s -check-prefix=SystemZ-warn
+// SystemZ-warn: warning: Unable to protect inline asm that clobbers stack pointer against stack clash
+
+int foo(int c) {
+ int r;
+ __asm__("ag %%r15, %0"
+ :
+ : "rm"(c)
+ : "r15");
+ return r;
+}
diff --git a/llvm/include/llvm/ADT/Triple.h b/llvm/include/llvm/ADT/Triple.h
index fa437a57520a..8e46265c7f71 100644
--- a/llvm/include/llvm/ADT/Triple.h
+++ b/llvm/include/llvm/ADT/Triple.h
@@ -739,6 +739,11 @@ class Triple {
return getArch() == Triple::riscv32 || getArch() == Triple::riscv64;
}
+ /// Tests whether the target is SystemZ.
+ bool isSystemZ() const {
+ return getArch() == Triple::systemz;
+ }
+
/// Tests whether the target is x86 (32- or 64-bit).
bool isX86() const {
return getArch() == Triple::x86 || getArch() == Triple::x86_64;
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 330c21398265..f86725a5fde8 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -374,12 +374,39 @@ static void emitIncrement(MachineBasicBlock &MBB,
}
}
+// Add CFI for the new CFA offset.
+static void buildCFAOffs(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, int Offset,
+ const SystemZInstrInfo *ZII) {
+ unsigned CFIIndex = MBB.getParent()->addFrameInst(
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, -Offset));
+ BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+}
+
+// Add CFI for the new frame location.
+static void buildDefCFAReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, unsigned Reg,
+ const SystemZInstrInfo *ZII) {
+ MachineFunction &MF = *MBB.getParent();
+ MachineModuleInfo &MMI = MF.getMMI();
+ const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
+ unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
+ unsigned CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::createDefCfaRegister(nullptr, RegNum));
+ BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+}
+
void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
+ const SystemZSubtarget &STI = MF.getSubtarget<SystemZSubtarget>();
+ const SystemZTargetLowering &TLI = *STI.getTargetLowering();
MachineFrameInfo &MFFrame = MF.getFrameInfo();
- auto *ZII =
- static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ auto *ZII = static_cast<const SystemZInstrInfo *>(STI.getInstrInfo());
SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
MachineBasicBlock::iterator MBBI = MBB.begin();
MachineModuleInfo &MMI = MF.getMMI();
@@ -462,13 +489,22 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
// Allocate StackSize bytes.
int64_t Delta = -int64_t(StackSize);
- emitIncrement(MBB, MBBI, DL, SystemZ::R15D, Delta, ZII);
-
- // Add CFI for the allocation.
- unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPOffsetFromCFA - Delta));
- BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ const unsigned ProbeSize = TLI.getStackProbeSize(MF);
+ bool FreeProbe = (ZFI->getSpillGPRRegs().GPROffset &&
+ (ZFI->getSpillGPRRegs().GPROffset + StackSize) < ProbeSize);
+ if (!FreeProbe &&
+ MF.getSubtarget().getTargetLowering()->hasInlineStackProbe(MF)) {
+ // Stack probing may involve looping, but splitting the prologue block
+ // is not possible at this point since it would invalidate the
+ // SaveBlocks / RestoreBlocks sets of PEI in the single block function
+ // case. Build a pseudo to be handled later by inlineStackProbe().
+ BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::PROBED_STACKALLOC))
+ .addImm(StackSize);
+ }
+ else {
+ emitIncrement(MBB, MBBI, DL, SystemZ::R15D, Delta, ZII);
+ buildCFAOffs(MBB, MBBI, DL, SPOffsetFromCFA + Delta, ZII);
+ }
SPOffsetFromCFA += Delta;
if (StoreBackchain) {
@@ -486,11 +522,7 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
.addReg(SystemZ::R15D);
// Add CFI for the new frame location.
- unsigned HardFP = MRI->getDwarfRegNum(SystemZ::R11D, true);
- unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::createDefCfaRegister(nullptr, HardFP));
- BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ buildDefCFAReg(MBB, MBBI, DL, SystemZ::R11D, ZII);
// Mark the FramePtr as live at the beginning of every block except
// the entry block. (We'll have marked R11 as live on entry when
@@ -583,6 +615,91 @@ void SystemZFrameLowering::emitEpilogue(MachineFunction &MF,
}
}
+void SystemZFrameLowering::inlineStackProbe(MachineFunction &MF,
+ MachineBasicBlock &PrologMBB) const {
+ auto *ZII =
+ static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const SystemZSubtarget &STI = MF.getSubtarget<SystemZSubtarget>();
+ const SystemZTargetLowering &TLI = *STI.getTargetLowering();
+
+ MachineInstr *StackAllocMI = nullptr;
+ for (MachineInstr &MI : PrologMBB)
+ if (MI.getOpcode() == SystemZ::PROBED_STACKALLOC) {
+ StackAllocMI = &MI;
+ break;
+ }
+ if (StackAllocMI == nullptr)
+ return;
+ uint64_t StackSize = StackAllocMI->getOperand(0).getImm();
+ const unsigned ProbeSize = TLI.getStackProbeSize(MF);
+ uint64_t NumFullBlocks = StackSize / ProbeSize;
+ uint64_t Residual = StackSize % ProbeSize;
+ int64_t SPOffsetFromCFA = -SystemZMC::CFAOffsetFromInitialSP;
+ MachineBasicBlock *MBB = &PrologMBB;
+ MachineBasicBlock::iterator MBBI = StackAllocMI;
+ const DebugLoc DL = StackAllocMI->getDebugLoc();
+
+ // Allocate a block of Size bytes on the stack and probe it.
+ auto allocateAndProbe = [&](MachineBasicBlock &InsMBB,
+ MachineBasicBlock::iterator InsPt, unsigned Size,
+ bool EmitCFI) -> void {
+ emitIncrement(InsMBB, InsPt, DL, SystemZ::R15D, -int64_t(Size), ZII);
+ if (EmitCFI) {
+ SPOffsetFromCFA -= Size;
+ buildCFAOffs(InsMBB, InsPt, DL, SPOffsetFromCFA, ZII);
+ }
+ // Probe by means of a volatile compare.
+ MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo(),
+ MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, 8, Align(1));
+ BuildMI(InsMBB, InsPt, DL, ZII->get(SystemZ::CG))
+ .addReg(SystemZ::R0D, RegState::Undef)
+ .addReg(SystemZ::R15D).addImm(Size - 8).addReg(0)
+ .addMemOperand(MMO);
+ };
+
+ if (NumFullBlocks < 3) {
+ // Emit unrolled probe statements.
+ for (unsigned int i = 0; i < NumFullBlocks; i++)
+ allocateAndProbe(*MBB, MBBI, ProbeSize, true/*EmitCFI*/);
+ } else {
+ // Emit a loop probing the pages.
+ uint64_t LoopAlloc = ProbeSize * NumFullBlocks;
+ SPOffsetFromCFA -= LoopAlloc;
+
+ BuildMI(*MBB, MBBI, DL, ZII->get(SystemZ::LGR), SystemZ::R1D)
+ .addReg(SystemZ::R15D);
+ buildDefCFAReg(*MBB, MBBI, DL, SystemZ::R1D, ZII);
+ emitIncrement(*MBB, MBBI, DL, SystemZ::R1D, -int64_t(LoopAlloc), ZII);
+ buildCFAOffs(*MBB, MBBI, DL, -int64_t(SystemZMC::CallFrameSize + LoopAlloc),
+ ZII);
+
+ MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MBBI, MBB);
+ MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(MBB);
+ MBB->addSuccessor(LoopMBB);
+ LoopMBB->addSuccessor(LoopMBB);
+ LoopMBB->addSuccessor(DoneMBB);
+
+ MBB = LoopMBB;
+ allocateAndProbe(*MBB, MBB->end(), ProbeSize, false/*EmitCFI*/);
+ BuildMI(*MBB, MBB->end(), DL, ZII->get(SystemZ::CLGR))
+ .addReg(SystemZ::R15D).addReg(SystemZ::R1D);
+ BuildMI(*MBB, MBB->end(), DL, ZII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_GT).addMBB(MBB);
+
+ MBB = DoneMBB;
+ MBBI = DoneMBB->begin();
+ buildDefCFAReg(*MBB, MBBI, DL, SystemZ::R15D, ZII);
+
+ recomputeLiveIns(*DoneMBB);
+ recomputeLiveIns(*LoopMBB);
+ }
+
+ if (Residual)
+ allocateAndProbe(*MBB, MBBI, Residual, true/*EmitCFI*/);
+
+ StackAllocMI->eraseFromParent();
+}
+
bool SystemZFrameLowering::hasFP(const MachineFunction &MF) const {
return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
MF.getFrameInfo().hasVarSizedObjects() ||
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
index b23f88f7de1f..8752acc7e5ae 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -43,6 +43,8 @@ class SystemZFrameLowering : public TargetFrameLowering {
RegScavenger *RS) const override;
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+ void inlineStackProbe(MachineFunction &MF,
+ MachineBasicBlock &PrologMBB) const override;
bool hasFP(const MachineFunction &MF) const override;
bool hasReservedCallFrame(const MachineFunction &MF) const override;
int getFrameIndexReference(const MachineFunction &MF, int FI,
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 59896d628816..a753f2a14a35 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -826,6 +826,15 @@ bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget);
}
+/// Returns true if stack probing through inline assembly is requested.
+bool SystemZTargetLowering::hasInlineStackProbe(MachineFunction &MF) const {
+ // If the function specifically requests inline stack probes, emit them.
+ if (MF.getFunction().hasFnAttribute("probe-stack"))
+ return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
+ "inline-asm";
+ return false;
+}
+
bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
// We can use CGFI or CLGFI.
return isInt<32>(Imm) || isUInt<32>(Imm);
@@ -3428,10 +3437,17 @@ lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
// Get the new stack pointer value.
- SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
-
- // Copy the new stack pointer back.
- Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
+ SDValue NewSP;
+ if (hasInlineStackProbe(MF)) {
+ NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL,
+ DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
+ Chain = NewSP.getValue(1);
+ }
+ else {
+ NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
+ // Copy the new stack pointer back.
+ Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
+ }
// The allocated data lives above the 160 bytes allocated for the standard
// frame, plus any outgoing stack arguments. We don't know how much that
@@ -5400,6 +5416,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(BR_CCMASK);
OPCODE(SELECT_CCMASK);
OPCODE(ADJDYNALLOC);
+ OPCODE(PROBED_ALLOCA);
OPCODE(POPCNT);
OPCODE(SMUL_LOHI);
OPCODE(UMUL_LOHI);
@@ -6825,38 +6842,29 @@ SystemZTargetLowering::ComputeNumSignBitsForTargetNode(
return 1;
}
+unsigned
+SystemZTargetLowering::getStackProbeSize(MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
+ unsigned StackAlign = TFI->getStackAlignment();
+ assert(StackAlign >=1 && isPowerOf2_32(StackAlign) &&
+ "Unexpected stack alignment");
+ // The default stack probe size is 4096 if the function has no
+ // stack-probe-size attribute.
+ unsigned StackProbeSize = 4096;
+ const Function &Fn = MF.getFunction();
+ if (Fn.hasFnAttribute("stack-probe-size"))
+ Fn.getFnAttribute("stack-probe-size")
+ .getValueAsString()
+ .getAsInteger(0, StackProbeSize);
+ // Round down to the stack alignment.
+ StackProbeSize &= ~(StackAlign - 1);
+ return StackProbeSize ? StackProbeSize : StackAlign;
+}
+
//===----------------------------------------------------------------------===//
// Custom insertion
//===----------------------------------------------------------------------===//
-// Create a new basic block after MBB.
-static MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB) {
- MachineFunction &MF = *MBB->getParent();
- MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
- MF.insert(std::next(MachineFunction::iterator(MBB)), NewMBB);
- return NewMBB;
-}
-
-// Split MBB after MI and return the new block (the one that contains
-// instructions after MI).
-static MachineBasicBlock *splitBlockAfter(MachineBasicBlock::iterator MI,
- MachineBasicBlock *MBB) {
- MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
- NewMBB->splice(NewMBB->begin(), MBB,
- std::next(MachineBasicBlock::iterator(MI)), MBB->end());
- NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
- return NewMBB;
-}
-
-// Split MBB before MI and return the new block (the one that contains MI).
-static MachineBasicBlock *splitBlockBefore(MachineBasicBlock::iterator MI,
- MachineBasicBlock *MBB) {
- MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
- NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end());
- NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
- return NewMBB;
-}
-
// Force base value Base into a register before MI. Return the register.
static Register forceReg(MachineInstr &MI, MachineOperand &Base,
const SystemZInstrInfo *TII) {
@@ -7027,8 +7035,8 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI,
bool CCKilled =
(LastMI->killsRegister(SystemZ::CC) || checkCCKill(*LastMI, MBB));
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *JoinMBB = splitBlockAfter(LastMI, MBB);
- MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
+ MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(LastMI, MBB);
+ MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
// Unless CC was killed in the last Select instruction, mark it as
// live-in to both FalseMBB and JoinMBB.
@@ -7121,8 +7129,8 @@ MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
CCMask ^= CCValid;
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB);
- MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
+ MachineBasicBlock *JoinMBB = SystemZ::splitBlockBefore(MI, MBB);
+ MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
// Unless CC was killed in the CondStore instruction, mark it as
// live-in to both FalseMBB and JoinMBB.
@@ -7205,8 +7213,8 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
// Insert a basic block for the main loop.
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
- MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
+ MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
+ MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
// StartMBB:
// ...
@@ -7323,10 +7331,10 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
// Insert 3 basic blocks for the loop.
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
- MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
- MachineBasicBlock *UseAltMBB = emitBlockAfter(LoopMBB);
- MachineBasicBlock *UpdateMBB = emitBlockAfter(UseAltMBB);
+ MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
+ MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
+ MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB);
+ MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB);
// StartMBB:
// ...
@@ -7434,9 +7442,9 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
// Insert 2 basic blocks for the loop.
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
- MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
- MachineBasicBlock *SetMBB = emitBlockAfter(LoopMBB);
+ MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
+ MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
+ MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB);
// StartMBB:
// ...
@@ -7596,7 +7604,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
// When generating more than one CLC, all but the last will need to
// branch to the end when a
diff erence is found.
MachineBasicBlock *EndMBB = (Length > 256 && Opcode == SystemZ::CLC ?
- splitBlockAfter(MI, MBB) : nullptr);
+ SystemZ::splitBlockAfter(MI, MBB) : nullptr);
// Check for the loop form, in which operand 5 is the trip count.
if (MI.getNumExplicitOperands() > 5) {
@@ -7620,9 +7628,10 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
Register NextCountReg = MRI.createVirtualRegister(RC);
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
- MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
- MachineBasicBlock *NextMBB = (EndMBB ? emitBlockAfter(LoopMBB) : LoopMBB);
+ MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
+ MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
+ MachineBasicBlock *NextMBB =
+ (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
// StartMBB:
// # fall through to LoopMMB
@@ -7738,7 +7747,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
// If there's another CLC to go, branch to the end if a
diff erence
// was found.
if (EndMBB && Length > 0) {
- MachineBasicBlock *NextMBB = splitBlockBefore(MI, MBB);
+ MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB);
BuildMI(MBB, DL, TII->get(SystemZ::BRC))
.addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
.addMBB(EndMBB);
@@ -7778,8 +7787,8 @@ MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
uint64_t End2Reg = MRI.createVirtualRegister(RC);
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
- MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
+ MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
+ MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
// StartMBB:
// # fall through to LoopMMB
@@ -7890,6 +7899,97 @@ MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
return MBB;
}
+MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
+ MachineInstr &MI, MachineBasicBlock *MBB) const {
+ MachineFunction &MF = *MBB->getParent();
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+ const SystemZInstrInfo *TII =
+ static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+ DebugLoc DL = MI.getDebugLoc();
+ const unsigned ProbeSize = getStackProbeSize(MF);
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SizeReg = MI.getOperand(2).getReg();
+
+ MachineBasicBlock *StartMBB = MBB;
+ MachineBasicBlock *DoneMBB = SystemZ::splitBlockAfter(MI, MBB);
+ MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB);
+ MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB);
+ MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB);
+ MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB);
+
+ MachineMemOperand *VolLdMMO = MF.getMachineMemOperand(MachinePointerInfo(),
+ MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, 8, Align(1));
+
+ Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+ Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+
+ // LoopTestMBB
+ // BRC TailTestMBB
+ // # fallthrough to LoopBodyMBB
+ StartMBB->addSuccessor(LoopTestMBB);
+ MBB = LoopTestMBB;
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg)
+ .addReg(SizeReg)
+ .addMBB(StartMBB)
+ .addReg(IncReg)
+ .addMBB(LoopBodyMBB);
+ BuildMI(MBB, DL, TII->get(SystemZ::CLGFI))
+ .addReg(PHIReg)
+ .addImm(ProbeSize);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_LT)
+ .addMBB(TailTestMBB);
+ MBB->addSuccessor(LoopBodyMBB);
+ MBB->addSuccessor(TailTestMBB);
+
+ // LoopBodyMBB: Allocate and probe by means of a volatile compare.
+ // J LoopTestMBB
+ MBB = LoopBodyMBB;
+ BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg)
+ .addReg(PHIReg)
+ .addImm(ProbeSize);
+ BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D)
+ .addReg(SystemZ::R15D)
+ .addImm(ProbeSize);
+ BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
+ .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0)
+ .setMemRefs(VolLdMMO);
+ BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB);
+ MBB->addSuccessor(LoopTestMBB);
+
+ // TailTestMBB
+ // BRC DoneMBB
+ // # fallthrough to TailMBB
+ MBB = TailTestMBB;
+ BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
+ .addReg(PHIReg)
+ .addImm(0);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ)
+ .addMBB(DoneMBB);
+ MBB->addSuccessor(TailMBB);
+ MBB->addSuccessor(DoneMBB);
+
+ // TailMBB
+ // # fallthrough to DoneMBB
+ MBB = TailMBB;
+ BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D)
+ .addReg(SystemZ::R15D)
+ .addReg(PHIReg);
+ BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
+ .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg)
+ .setMemRefs(VolLdMMO);
+ MBB->addSuccessor(DoneMBB);
+
+ // DoneMBB
+ MBB = DoneMBB;
+ BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg)
+ .addReg(SystemZ::R15D);
+
+ MI.eraseFromParent();
+ return DoneMBB;
+}
+
MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
MachineInstr &MI, MachineBasicBlock *MBB) const {
switch (MI.getOpcode()) {
@@ -8150,6 +8250,9 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
case SystemZ::LTXBRCompare_VecPseudo:
return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
+ case SystemZ::PROBED_ALLOCA:
+ return emitProbedAlloca(MI, MBB);
+
case TargetOpcode::STACKMAP:
case TargetOpcode::PATCHPOINT:
return emitPatchPoint(MI, MBB);
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index dd6098d7bb94..e60deaedbdfb 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -83,6 +83,10 @@ enum NodeType : unsigned {
// base of the dynamically-allocatable area.
ADJDYNALLOC,
+ // For allocating stack space when using stack clash protector.
+ // Allocation is performed by block, and each block is probed.
+ PROBED_ALLOCA,
+
// Count number of bits set in operand 0 per byte.
POPCNT,
@@ -428,6 +432,7 @@ class SystemZTargetLowering : public TargetLowering {
EVT VT) const override;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
+ bool hasInlineStackProbe(MachineFunction &MF) const override;
bool isLegalICmpImmediate(int64_t Imm) const override;
bool isLegalAddImmediate(int64_t Imm) const override;
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
@@ -556,6 +561,8 @@ class SystemZTargetLowering : public TargetLowering {
return true;
}
+ unsigned getStackProbeSize(MachineFunction &MF) const;
+
private:
const SystemZSubtarget &Subtarget;
@@ -691,6 +698,8 @@ class SystemZTargetLowering : public TargetLowering {
MachineBasicBlock *emitLoadAndTestCmp0(MachineInstr &MI,
MachineBasicBlock *MBB,
unsigned Opcode) const;
+ MachineBasicBlock *emitProbedAlloca(MachineInstr &MI,
+ MachineBasicBlock *MBB) const;
MachineMemOperand::Flags
getTargetMMOFlags(const Instruction &I) const override;
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index be791bd7acf1..223cfcba2fac 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -1872,6 +1872,30 @@ unsigned SystemZ::reverseCCMask(unsigned CCMask) {
(CCMask & SystemZ::CCMASK_CMP_UO));
}
+MachineBasicBlock *SystemZ::emitBlockAfter(MachineBasicBlock *MBB) {
+ MachineFunction &MF = *MBB->getParent();
+ MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
+ MF.insert(std::next(MachineFunction::iterator(MBB)), NewMBB);
+ return NewMBB;
+}
+
+MachineBasicBlock *SystemZ::splitBlockAfter(MachineBasicBlock::iterator MI,
+ MachineBasicBlock *MBB) {
+ MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
+ NewMBB->splice(NewMBB->begin(), MBB,
+ std::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
+ return NewMBB;
+}
+
+MachineBasicBlock *SystemZ::splitBlockBefore(MachineBasicBlock::iterator MI,
+ MachineBasicBlock *MBB) {
+ MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
+ NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end());
+ NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
+ return NewMBB;
+}
+
unsigned SystemZInstrInfo::getLoadAndTrap(unsigned Opcode) const {
if (!STI.hasLoadAndTrap())
return 0;
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
index 2247f9f3fdbf..72dafc3c93c2 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -159,6 +159,16 @@ int getTargetMemOpcode(uint16_t Opcode);
// Return a version of comparison CC mask CCMask in which the LT and GT
// actions are swapped.
unsigned reverseCCMask(unsigned CCMask);
+
+// Create a new basic block after MBB.
+MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB);
+// Split MBB after MI and return the new block (the one that contains
+// instructions after MI).
+MachineBasicBlock *splitBlockAfter(MachineBasicBlock::iterator MI,
+ MachineBasicBlock *MBB);
+// Split MBB before MI and return the new block (the one that contains MI).
+MachineBasicBlock *splitBlockBefore(MachineBasicBlock::iterator MI,
+ MachineBasicBlock *MBB);
}
class SystemZInstrInfo : public SystemZGenInstrInfo {
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 5e153f0d1279..d5d56ecf6e47 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -29,6 +29,15 @@ let hasNoSchedulingInfo = 1, hasSideEffects = 1 in {
def ADJDYNALLOC : Pseudo<(outs GR64:$dst), (ins dynalloc12only:$src),
[(set GR64:$dst, dynalloc12only:$src)]>;
+let Defs = [R15D, CC], Uses = [R15D], hasNoSchedulingInfo = 1,
+ usesCustomInserter = 1 in
+ def PROBED_ALLOCA : Pseudo<(outs GR64:$dst),
+ (ins GR64:$oldSP, GR64:$space),
+ [(set GR64:$dst, (z_probed_alloca GR64:$oldSP, GR64:$space))]>;
+
+let Defs = [R1D, R15D, CC], Uses = [R15D], hasNoSchedulingInfo = 1,
+ hasSideEffects = 1 in
+ def PROBED_STACKALLOC : Pseudo<(outs), (ins i64imm:$stacksize), []>;
//===----------------------------------------------------------------------===//
// Branch instructions
diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td
index 852c6282be6e..81af5fd854db 100644
--- a/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -40,6 +40,10 @@ def SDT_ZWrapOffset : SDTypeProfile<1, 2,
SDTCisSameAs<0, 2>,
SDTCisPtrTy<0>]>;
def SDT_ZAdjDynAlloc : SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>;
+def SDT_ZProbedAlloca : SDTypeProfile<1, 2,
+ [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisPtrTy<0>]>;
def SDT_ZGR128Binary : SDTypeProfile<1, 2,
[SDTCisVT<0, untyped>,
SDTCisInt<1>,
@@ -269,6 +273,8 @@ def z_select_ccmask_1 : SDNode<"SystemZISD::SELECT_CCMASK",
SDT_ZSelectCCMask>;
def z_ipm_1 : SDNode<"SystemZISD::IPM", SDT_ZIPM>;
def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>;
+def z_probed_alloca : SDNode<"SystemZISD::PROBED_ALLOCA", SDT_ZProbedAlloca,
+ [SDNPHasChain]>;
def z_popcnt : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>;
def z_smul_lohi : SDNode<"SystemZISD::SMUL_LOHI", SDT_ZGR128Binary>;
def z_umul_lohi : SDNode<"SystemZISD::UMUL_LOHI", SDT_ZGR128Binary>;
diff --git a/llvm/test/CodeGen/SystemZ/stack-clash-dynamic-alloca.ll b/llvm/test/CodeGen/SystemZ/stack-clash-dynamic-alloca.ll
new file mode 100644
index 000000000000..748f441a9219
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/stack-clash-dynamic-alloca.ll
@@ -0,0 +1,136 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+define i32 @fun0(i32 %n) #0 {
+; CHECK-LABEL: fun0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r11, %r15, 88(%r15)
+; CHECK-NEXT: .cfi_offset %r11, -72
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: aghi %r15, -160
+; CHECK-NEXT: .cfi_def_cfa_offset 320
+; CHECK-NEXT: lgr %r11, %r15
+; CHECK-NEXT: .cfi_def_cfa_register %r11
+; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d
+; CHECK-NEXT: risbgn %r1, %r2, 30, 189, 2
+; CHECK-NEXT: la %r0, 7(%r1)
+; CHECK-NEXT: risbgn %r1, %r0, 29, 188, 0
+; CHECK-NEXT: clgfi %r1, 4096
+; CHECK-NEXT: jl .LBB0_2
+; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: slgfi %r1, 4096
+; CHECK-NEXT: slgfi %r15, 4096
+; CHECK-NEXT: cg %r15, 4088(%r15)
+; CHECK-NEXT: clgfi %r1, 4096
+; CHECK-NEXT: jhe .LBB0_1
+; CHECK-NEXT: .LBB0_2:
+; CHECK-NEXT: cgije %r1, 0, .LBB0_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: slgr %r15, %r1
+; CHECK-NEXT: cg %r15, -8(%r1,%r15)
+; CHECK-NEXT: .LBB0_4:
+; CHECK-NEXT: la %r1, 160(%r15)
+; CHECK-NEXT: lhi %r0, 1
+; CHECK-NEXT: sty %r0, 4792(%r1)
+; CHECK-NEXT: l %r2, 0(%r1)
+; CHECK-NEXT: lmg %r11, %r15, 248(%r11)
+; CHECK-NEXT: br %r14
+
+ %a = alloca i32, i32 %n
+ %b = getelementptr inbounds i32, i32* %a, i64 1198
+ store volatile i32 1, i32* %b
+ %c = load volatile i32, i32* %a
+ ret i32 %c
+}
+
+; Probe size should be modulo stack alignment.
+define i32 @fun1(i32 %n) #0 "stack-probe-size"="1250" {
+; CHECK-LABEL: fun1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r11, %r15, 88(%r15)
+; CHECK-NEXT: .cfi_offset %r11, -72
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: aghi %r15, -160
+; CHECK-NEXT: .cfi_def_cfa_offset 320
+; CHECK-NEXT: lgr %r11, %r15
+; CHECK-NEXT: .cfi_def_cfa_register %r11
+; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d
+; CHECK-NEXT: risbgn %r1, %r2, 30, 189, 2
+; CHECK-NEXT: la %r0, 7(%r1)
+; CHECK-NEXT: risbgn %r1, %r0, 29, 188, 0
+; CHECK-NEXT: clgfi %r1, 1248
+; CHECK-NEXT: jl .LBB1_2
+; CHECK-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: slgfi %r1, 1248
+; CHECK-NEXT: slgfi %r15, 1248
+; CHECK-NEXT: cg %r15, 1240(%r15)
+; CHECK-NEXT: clgfi %r1, 1248
+; CHECK-NEXT: jhe .LBB1_1
+; CHECK-NEXT: .LBB1_2:
+; CHECK-NEXT: cgije %r1, 0, .LBB1_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: slgr %r15, %r1
+; CHECK-NEXT: cg %r15, -8(%r1,%r15)
+; CHECK-NEXT: .LBB1_4:
+; CHECK-NEXT: la %r1, 160(%r15)
+; CHECK-NEXT: lhi %r0, 1
+; CHECK-NEXT: sty %r0, 4792(%r1)
+; CHECK-NEXT: l %r2, 0(%r1)
+; CHECK-NEXT: lmg %r11, %r15, 248(%r11)
+; CHECK-NEXT: br %r14
+ %a = alloca i32, i32 %n
+ %b = getelementptr inbounds i32, i32* %a, i64 1198
+ store volatile i32 1, i32* %b
+ %c = load volatile i32, i32* %a
+ ret i32 %c
+}
+
+; The minimum probe size is the stack alignment.
+define i32 @fun2(i32 %n) #0 "stack-probe-size"="4" {
+; CHECK-LABEL: fun2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r11, %r15, 88(%r15)
+; CHECK-NEXT: .cfi_offset %r11, -72
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: lgr %r1, %r15
+; CHECK-NEXT: .cfi_def_cfa_register %r1
+; CHECK-NEXT: aghi %r1, -160
+; CHECK-NEXT: .cfi_def_cfa_offset 320
+; CHECK-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: aghi %r15, -8
+; CHECK-NEXT: cg %r0, 0(%r15)
+; CHECK-NEXT: clgrjh %r15, %r1, .LBB2_1
+; CHECK-NEXT: # %bb.2:
+; CHECK-NEXT: .cfi_def_cfa_register %r15
+; CHECK-NEXT: lgr %r11, %r15
+; CHECK-NEXT: .cfi_def_cfa_register %r11
+; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d
+; CHECK-NEXT: risbgn %r1, %r2, 30, 189, 2
+; CHECK-NEXT: la %r0, 7(%r1)
+; CHECK-NEXT: risbgn %r1, %r0, 29, 188, 0
+; CHECK-NEXT: clgijl %r1, 8, .LBB2_4
+; CHECK-NEXT: .LBB2_3: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: slgfi %r1, 8
+; CHECK-NEXT: slgfi %r15, 8
+; CHECK-NEXT: cg %r15, 0(%r15)
+; CHECK-NEXT: clgijhe %r1, 8, .LBB2_3
+; CHECK-NEXT: .LBB2_4:
+; CHECK-NEXT: cgije %r1, 0, .LBB2_6
+; CHECK-NEXT: # %bb.5:
+; CHECK-NEXT: slgr %r15, %r1
+; CHECK-NEXT: cg %r15, -8(%r1,%r15)
+; CHECK-NEXT: .LBB2_6:
+; CHECK-NEXT: la %r1, 160(%r15)
+; CHECK-NEXT: lhi %r0, 1
+; CHECK-NEXT: sty %r0, 4792(%r1)
+; CHECK-NEXT: l %r2, 0(%r1)
+; CHECK-NEXT: lmg %r11, %r15, 248(%r11)
+; CHECK-NEXT: br %r14
+ %a = alloca i32, i32 %n
+ %b = getelementptr inbounds i32, i32* %a, i64 1198
+ store volatile i32 1, i32* %b
+ %c = load volatile i32, i32* %a
+ ret i32 %c
+}
+
+attributes #0 = {"probe-stack"="inline-asm"}
diff --git a/llvm/test/CodeGen/SystemZ/stack-clash-protection.ll b/llvm/test/CodeGen/SystemZ/stack-clash-protection.ll
new file mode 100644
index 000000000000..8166af3673cd
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/stack-clash-protection.ll
@@ -0,0 +1,242 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -O3 | FileCheck %s
+;
+; Test stack clash protection probing for static allocas.
+
+; Small: one probe.
+define i32 @fun0() #0 {
+; CHECK-LABEL: fun0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: aghi %r15, -560
+; CHECK-NEXT: .cfi_def_cfa_offset 720
+; CHECK-NEXT: cg %r0, 552(%r15)
+; CHECK-NEXT: mvhi 552(%r15), 1
+; CHECK-NEXT: l %r2, 160(%r15)
+; CHECK-NEXT: aghi %r15, 560
+; CHECK-NEXT: br %r14
+
+ %a = alloca i32, i64 100
+ %b = getelementptr inbounds i32, i32* %a, i64 98
+ store volatile i32 1, i32* %b
+ %c = load volatile i32, i32* %a
+ ret i32 %c
+}
+
+; Medium: two probes.
+define i32 @fun1() #0 {
+; CHECK-LABEL: fun1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: aghi %r15, -4096
+; CHECK-NEXT: .cfi_def_cfa_offset 4256
+; CHECK-NEXT: cg %r0, 4088(%r15)
+; CHECK-NEXT: aghi %r15, -4080
+; CHECK-NEXT: .cfi_def_cfa_offset 8336
+; CHECK-NEXT: cg %r0, 4072(%r15)
+; CHECK-NEXT: mvhi 976(%r15), 1
+; CHECK-NEXT: l %r2, 176(%r15)
+; CHECK-NEXT: aghi %r15, 8176
+; CHECK-NEXT: br %r14
+
+ %a = alloca i32, i64 2000
+ %b = getelementptr inbounds i32, i32* %a, i64 200
+ store volatile i32 1, i32* %b
+ %c = load volatile i32, i32* %a
+ ret i32 %c
+}
+
+; Large: Use a loop to allocate and probe in steps.
+define i32 @fun2() #0 {
+; CHECK-LABEL: fun2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lgr %r1, %r15
+; CHECK-NEXT: .cfi_def_cfa_register %r1
+; CHECK-NEXT: agfi %r1, -69632
+; CHECK-NEXT: .cfi_def_cfa_offset 69792
+; CHECK-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: aghi %r15, -4096
+; CHECK-NEXT: cg %r0, 4088(%r15)
+; CHECK-NEXT: clgrjh %r15, %r1, .LBB2_1
+; CHECK-NEXT: # %bb.2:
+; CHECK-NEXT: .cfi_def_cfa_register %r15
+; CHECK-NEXT: aghi %r15, -2544
+; CHECK-NEXT: .cfi_def_cfa_offset 72336
+; CHECK-NEXT: cg %r0, 2536(%r15)
+; CHECK-NEXT: lhi %r0, 1
+; CHECK-NEXT: mvhi 568(%r15), 1
+; CHECK-NEXT: sty %r0, 28968(%r15)
+; CHECK-NEXT: l %r2, 176(%r15)
+; CHECK-NEXT: agfi %r15, 72176
+; CHECK-NEXT: br %r14
+
+ %a = alloca i32, i64 18000
+ %b0 = getelementptr inbounds i32, i32* %a, i64 98
+ %b1 = getelementptr inbounds i32, i32* %a, i64 7198
+ store volatile i32 1, i32* %b0
+ store volatile i32 1, i32* %b1
+ %c = load volatile i32, i32* %a
+ ret i32 %c
+}
+
+; Ends evenly on the step so no remainder needed.
+define void @fun3() #0 {
+; CHECK-LABEL: fun3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lgr %r1, %r15
+; CHECK-NEXT: .cfi_def_cfa_register %r1
+; CHECK-NEXT: aghi %r1, -28672
+; CHECK-NEXT: .cfi_def_cfa_offset 28832
+; CHECK-NEXT: .LBB3_1: # %entry
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: aghi %r15, -4096
+; CHECK-NEXT: cg %r0, 4088(%r15)
+; CHECK-NEXT: clgrjh %r15, %r1, .LBB3_1
+; CHECK-NEXT: # %bb.2: # %entry
+; CHECK-NEXT: .cfi_def_cfa_register %r15
+; CHECK-NEXT: mvhi 180(%r15), 0
+; CHECK-NEXT: l %r0, 180(%r15)
+; CHECK-NEXT: aghi %r15, 28672
+; CHECK-NEXT: br %r14
+entry:
+ %stack = alloca [7122 x i32], align 4
+ %i = alloca i32, align 4
+ %0 = bitcast [7122 x i32]* %stack to i8*
+ %i.0.i.0..sroa_cast = bitcast i32* %i to i8*
+ store volatile i32 0, i32* %i, align 4
+ %i.0.i.0.6 = load volatile i32, i32* %i, align 4
+ ret void
+}
+
+; Loop with bigger step.
+define void @fun4() #0 "stack-probe-size"="8192" {
+; CHECK-LABEL: fun4:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lgr %r1, %r15
+; CHECK-NEXT: .cfi_def_cfa_register %r1
+; CHECK-NEXT: aghi %r1, -24576
+; CHECK-NEXT: .cfi_def_cfa_offset 24736
+; CHECK-NEXT: .LBB4_1: # %entry
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: aghi %r15, -8192
+; CHECK-NEXT: cg %r0, 8184(%r15)
+; CHECK-NEXT: clgrjh %r15, %r1, .LBB4_1
+; CHECK-NEXT: # %bb.2: # %entry
+; CHECK-NEXT: .cfi_def_cfa_register %r15
+; CHECK-NEXT: aghi %r15, -7608
+; CHECK-NEXT: .cfi_def_cfa_offset 32344
+; CHECK-NEXT: cg %r0, 7600(%r15)
+; CHECK-NEXT: mvhi 180(%r15), 0
+; CHECK-NEXT: l %r0, 180(%r15)
+; CHECK-NEXT: aghi %r15, 32184
+; CHECK-NEXT: br %r14
+entry:
+ %stack = alloca [8000 x i32], align 4
+ %i = alloca i32, align 4
+ %0 = bitcast [8000 x i32]* %stack to i8*
+ %i.0.i.0..sroa_cast = bitcast i32* %i to i8*
+ store volatile i32 0, i32* %i, align 4
+ %i.0.i.0.6 = load volatile i32, i32* %i, align 4
+ ret void
+}
+
+; Probe size should be modulo stack alignment.
+define void @fun5() #0 "stack-probe-size"="4100" {
+; CHECK-LABEL: fun5:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: aghi %r15, -4096
+; CHECK-NEXT: .cfi_def_cfa_offset 4256
+; CHECK-NEXT: cg %r0, 4088(%r15)
+; CHECK-NEXT: aghi %r15, -88
+; CHECK-NEXT: .cfi_def_cfa_offset 4344
+; CHECK-NEXT: cg %r0, 80(%r15)
+; CHECK-NEXT: mvhi 180(%r15), 0
+; CHECK-NEXT: l %r0, 180(%r15)
+; CHECK-NEXT: aghi %r15, 4184
+; CHECK-NEXT: br %r14
+entry:
+ %stack = alloca [1000 x i32], align 4
+ %i = alloca i32, align 4
+ %0 = bitcast [1000 x i32]* %stack to i8*
+ %i.0.i.0..sroa_cast = bitcast i32* %i to i8*
+ store volatile i32 0, i32* %i, align 4
+ %i.0.i.0.6 = load volatile i32, i32* %i, align 4
+ ret void
+}
+
+; The minimum probe size is the stack alignment.
+define void @fun6() #0 "stack-probe-size"="5" {
+; CHECK-LABEL: fun6:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lgr %r1, %r15
+; CHECK-NEXT: .cfi_def_cfa_register %r1
+; CHECK-NEXT: aghi %r1, -4184
+; CHECK-NEXT: .cfi_def_cfa_offset 4344
+; CHECK-NEXT: .LBB6_1: # %entry
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: aghi %r15, -8
+; CHECK-NEXT: cg %r0, 0(%r15)
+; CHECK-NEXT: clgrjh %r15, %r1, .LBB6_1
+; CHECK-NEXT: # %bb.2: # %entry
+; CHECK-NEXT: .cfi_def_cfa_register %r15
+; CHECK-NEXT: mvhi 180(%r15), 0
+; CHECK-NEXT: l %r0, 180(%r15)
+; CHECK-NEXT: aghi %r15, 4184
+; CHECK-NEXT: br %r14
+entry:
+ %stack = alloca [1000 x i32], align 4
+ %i = alloca i32, align 4
+ %0 = bitcast [1000 x i32]* %stack to i8*
+ %i.0.i.0..sroa_cast = bitcast i32* %i to i8*
+ store volatile i32 0, i32* %i, align 4
+ %i.0.i.0.6 = load volatile i32, i32* %i, align 4
+ ret void
+}
+
+; Small with a natural probe (STMG) - needs no extra probe.
+define i32 @fun7() #0 {
+; CHECK-LABEL: fun7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: aghi %r15, -3976
+; CHECK-NEXT: .cfi_def_cfa_offset 4136
+; CHECK-NEXT: brasl %r14, foo at PLT
+; CHECK-NEXT: st %r2, 568(%r15)
+; CHECK-NEXT: l %r2, 176(%r15)
+; CHECK-NEXT: lmg %r14, %r15, 4088(%r15)
+; CHECK-NEXT: br %r14
+ %v = call i32 @foo()
+ %a = alloca i32, i64 950
+ %b = getelementptr inbounds i32, i32* %a, i64 98
+ store volatile i32 %v, i32* %b
+ %c = load volatile i32, i32* %a
+ ret i32 %c
+}
+
+; Medium with an STMG - still needs probing.
+define i32 @fun8() #0 {
+; CHECK-LABEL: fun8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT: .cfi_offset %r14, -48
+; CHECK-NEXT: .cfi_offset %r15, -40
+; CHECK-NEXT: aghi %r15, -3984
+; CHECK-NEXT: .cfi_def_cfa_offset 4144
+; CHECK-NEXT: cg %r0, 3976(%r15)
+; CHECK-NEXT: brasl %r14, foo at PLT
+; CHECK-NEXT: st %r2, 976(%r15)
+; CHECK-NEXT: l %r2, 176(%r15)
+; CHECK-NEXT: lmg %r14, %r15, 4096(%r15)
+; CHECK-NEXT: br %r14
+
+ %v = call i32 @foo()
+ %a = alloca i32, i64 952
+ %b = getelementptr inbounds i32, i32* %a, i64 200
+ store volatile i32 %v, i32* %b
+ %c = load volatile i32, i32* %a
+ ret i32 %c
+}
+
+declare i32 @foo()
+attributes #0 = { "probe-stack"="inline-asm" }
+
More information about the cfe-commits
mailing list