[clang] 515bfc6 - [SystemZ] Implement -fstack-clash-protection

Jonas Paulsson via cfe-commits cfe-commits at lists.llvm.org
Sat Jun 6 09:39:03 PDT 2020


Author: Jonas Paulsson
Date: 2020-06-06T18:38:36+02:00
New Revision: 515bfc66eaced830c03b2ec187bef0d8c4dc6915

URL: https://github.com/llvm/llvm-project/commit/515bfc66eaced830c03b2ec187bef0d8c4dc6915
DIFF: https://github.com/llvm/llvm-project/commit/515bfc66eaced830c03b2ec187bef0d8c4dc6915.diff

LOG: [SystemZ] Implement -fstack-clash-protection

Probing of allocated stack space is now done when this option is passed. The
purpose is to protect against the stack clash attack (see
https://www.qualys.com/2017/06/19/stack-clash/stack-clash.txt).

Review: Ulrich Weigand

Differential Revision: https://reviews.llvm.org/D78717

Added: 
    clang/test/Driver/stack-clash-protection-02.c
    llvm/test/CodeGen/SystemZ/stack-clash-dynamic-alloca.ll
    llvm/test/CodeGen/SystemZ/stack-clash-protection.ll

Modified: 
    clang/docs/ReleaseNotes.rst
    clang/lib/Basic/Targets/SystemZ.h
    clang/lib/Driver/ToolChains/Clang.cpp
    clang/test/CodeGen/stack-clash-protection.c
    llvm/include/llvm/ADT/Triple.h
    llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
    llvm/lib/Target/SystemZ/SystemZFrameLowering.h
    llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
    llvm/lib/Target/SystemZ/SystemZISelLowering.h
    llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
    llvm/lib/Target/SystemZ/SystemZInstrInfo.h
    llvm/lib/Target/SystemZ/SystemZInstrInfo.td
    llvm/lib/Target/SystemZ/SystemZOperators.td

Removed: 
    


################################################################################
diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 25ff809120de..15e6d35117b4 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -94,8 +94,8 @@ New Compiler Flags
 ------------------
 
 - -fstack-clash-protection will provide a protection against the stack clash
-  attack for x86 architecture through automatic probing of each page of
-  allocated stack.
+  attack for x86 and s390x architectures through automatic probing of each page
+  of allocated stack.
 
 - -ffp-exception-behavior={ignore,maytrap,strict} allows the user to specify
   the floating-point exception behavior. The default setting is ``ignore``.

diff  --git a/clang/lib/Basic/Targets/SystemZ.h b/clang/lib/Basic/Targets/SystemZ.h
index 1c8822e2bc2d..134b0313b86a 100644
--- a/clang/lib/Basic/Targets/SystemZ.h
+++ b/clang/lib/Basic/Targets/SystemZ.h
@@ -64,6 +64,10 @@ class LLVM_LIBRARY_VISIBILITY SystemZTargetInfo : public TargetInfo {
 
   ArrayRef<TargetInfo::AddlRegName> getGCCAddlRegNames() const override;
 
+  bool isSPRegName(StringRef RegName) const override {
+    return RegName.equals("r15");
+  }
+
   bool validateAsmConstraint(const char *&Name,
                              TargetInfo::ConstraintInfo &info) const override;
 

diff  --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index b20048768e44..513f32caad8a 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -2997,7 +2997,7 @@ static void RenderSCPOptions(const ToolChain &TC, const ArgList &Args,
   if (!EffectiveTriple.isOSLinux())
     return;
 
-  if (!EffectiveTriple.isX86())
+  if (!EffectiveTriple.isX86() && !EffectiveTriple.isSystemZ())
     return;
 
   if (Args.hasFlag(options::OPT_fstack_clash_protection,

diff  --git a/clang/test/CodeGen/stack-clash-protection.c b/clang/test/CodeGen/stack-clash-protection.c
index f970bf909cbe..eb48da8ff9e9 100644
--- a/clang/test/CodeGen/stack-clash-protection.c
+++ b/clang/test/CodeGen/stack-clash-protection.c
@@ -1,5 +1,6 @@
 // Check the correct function attributes are generated
 // RUN: %clang_cc1 -triple x86_64-linux -O0 -S -emit-llvm -o- %s -fstack-clash-protection | FileCheck %s
+// RUN: %clang_cc1 -triple s390x-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection | FileCheck %s
 
 // CHECK: define void @large_stack() #[[A:.*]] {
 void large_stack() {

diff  --git a/clang/test/Driver/stack-clash-protection-02.c b/clang/test/Driver/stack-clash-protection-02.c
new file mode 100644
index 000000000000..25ff3b5d6940
--- /dev/null
+++ b/clang/test/Driver/stack-clash-protection-02.c
@@ -0,0 +1,13 @@
+// RUN: %clang -target s390x-linux-gnu -fstack-clash-protection -### %s 2>&1 | FileCheck %s -check-prefix=SystemZ
+// SystemZ: "-fstack-clash-protection"
+// RUN: %clang -target s390x-linux-gnu -fstack-clash-protection -S -emit-llvm -o %t.ll %s 2>&1 | FileCheck %s -check-prefix=SystemZ-warn
+// SystemZ-warn: warning: Unable to protect inline asm that clobbers stack pointer against stack clash
+
+int foo(int c) {
+  int r;
+  __asm__("ag %%r15, %0"
+          :
+          : "rm"(c)
+          : "r15");
+  return r;
+}

diff  --git a/llvm/include/llvm/ADT/Triple.h b/llvm/include/llvm/ADT/Triple.h
index fa437a57520a..8e46265c7f71 100644
--- a/llvm/include/llvm/ADT/Triple.h
+++ b/llvm/include/llvm/ADT/Triple.h
@@ -739,6 +739,11 @@ class Triple {
     return getArch() == Triple::riscv32 || getArch() == Triple::riscv64;
   }
 
+  /// Tests whether the target is SystemZ.
+  bool isSystemZ() const {
+    return getArch() == Triple::systemz;
+  }
+
   /// Tests whether the target is x86 (32- or 64-bit).
   bool isX86() const {
     return getArch() == Triple::x86 || getArch() == Triple::x86_64;

diff  --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 330c21398265..f86725a5fde8 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -374,12 +374,39 @@ static void emitIncrement(MachineBasicBlock &MBB,
   }
 }
 
+// Add CFI for the new CFA offset.
+static void buildCFAOffs(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator MBBI,
+                         const DebugLoc &DL, int Offset,
+                         const SystemZInstrInfo *ZII) {
+  unsigned CFIIndex = MBB.getParent()->addFrameInst(
+    MCCFIInstruction::cfiDefCfaOffset(nullptr, -Offset));
+  BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
+    .addCFIIndex(CFIIndex);
+}
+
+// Add CFI for the new frame location.
+static void buildDefCFAReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MBBI,
+                           const DebugLoc &DL, unsigned Reg,
+                           const SystemZInstrInfo *ZII) {
+  MachineFunction &MF = *MBB.getParent();
+  MachineModuleInfo &MMI = MF.getMMI();
+  const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
+  unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
+  unsigned CFIIndex = MF.addFrameInst(
+                        MCCFIInstruction::createDefCfaRegister(nullptr, RegNum));
+  BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
+    .addCFIIndex(CFIIndex);
+}
+
 void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
                                         MachineBasicBlock &MBB) const {
   assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
+  const SystemZSubtarget &STI = MF.getSubtarget<SystemZSubtarget>();
+  const SystemZTargetLowering &TLI = *STI.getTargetLowering();
   MachineFrameInfo &MFFrame = MF.getFrameInfo();
-  auto *ZII =
-      static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+  auto *ZII = static_cast<const SystemZInstrInfo *>(STI.getInstrInfo());
   SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
   MachineBasicBlock::iterator MBBI = MBB.begin();
   MachineModuleInfo &MMI = MF.getMMI();
@@ -462,13 +489,22 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
 
     // Allocate StackSize bytes.
     int64_t Delta = -int64_t(StackSize);
-    emitIncrement(MBB, MBBI, DL, SystemZ::R15D, Delta, ZII);
-
-    // Add CFI for the allocation.
-    unsigned CFIIndex = MF.addFrameInst(
-        MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPOffsetFromCFA - Delta));
-    BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
-        .addCFIIndex(CFIIndex);
+    const unsigned ProbeSize = TLI.getStackProbeSize(MF);
+    bool FreeProbe = (ZFI->getSpillGPRRegs().GPROffset &&
+           (ZFI->getSpillGPRRegs().GPROffset + StackSize) < ProbeSize);
+    if (!FreeProbe &&
+        MF.getSubtarget().getTargetLowering()->hasInlineStackProbe(MF)) {
+      // Stack probing may involve looping, but splitting the prologue block
+      // is not possible at this point since it would invalidate the
+      // SaveBlocks / RestoreBlocks sets of PEI in the single block function
+      // case. Build a pseudo to be handled later by inlineStackProbe().
+      BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::PROBED_STACKALLOC))
+        .addImm(StackSize);
+    }
+    else {
+      emitIncrement(MBB, MBBI, DL, SystemZ::R15D, Delta, ZII);
+      buildCFAOffs(MBB, MBBI, DL, SPOffsetFromCFA + Delta, ZII);
+    }
     SPOffsetFromCFA += Delta;
 
     if (StoreBackchain) {
@@ -486,11 +522,7 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
       .addReg(SystemZ::R15D);
 
     // Add CFI for the new frame location.
-    unsigned HardFP = MRI->getDwarfRegNum(SystemZ::R11D, true);
-    unsigned CFIIndex = MF.addFrameInst(
-        MCCFIInstruction::createDefCfaRegister(nullptr, HardFP));
-    BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
-        .addCFIIndex(CFIIndex);
+    buildDefCFAReg(MBB, MBBI, DL, SystemZ::R11D, ZII);
 
     // Mark the FramePtr as live at the beginning of every block except
     // the entry block.  (We'll have marked R11 as live on entry when
@@ -583,6 +615,91 @@ void SystemZFrameLowering::emitEpilogue(MachineFunction &MF,
   }
 }
 
+void SystemZFrameLowering::inlineStackProbe(MachineFunction &MF,
+                                            MachineBasicBlock &PrologMBB) const {
+  auto *ZII =
+    static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+  const SystemZSubtarget &STI = MF.getSubtarget<SystemZSubtarget>();
+  const SystemZTargetLowering &TLI = *STI.getTargetLowering();
+
+  MachineInstr *StackAllocMI = nullptr;
+  for (MachineInstr &MI : PrologMBB)
+    if (MI.getOpcode() == SystemZ::PROBED_STACKALLOC) {
+      StackAllocMI = &MI;
+      break;
+    }
+  if (StackAllocMI == nullptr)
+    return;
+  uint64_t StackSize = StackAllocMI->getOperand(0).getImm();
+  const unsigned ProbeSize = TLI.getStackProbeSize(MF);
+  uint64_t NumFullBlocks = StackSize / ProbeSize;
+  uint64_t Residual = StackSize % ProbeSize;
+  int64_t SPOffsetFromCFA = -SystemZMC::CFAOffsetFromInitialSP;
+  MachineBasicBlock *MBB = &PrologMBB;
+  MachineBasicBlock::iterator MBBI = StackAllocMI;
+  const DebugLoc DL = StackAllocMI->getDebugLoc();
+
+  // Allocate a block of Size bytes on the stack and probe it.
+  auto allocateAndProbe = [&](MachineBasicBlock &InsMBB,
+                              MachineBasicBlock::iterator InsPt, unsigned Size,
+                              bool EmitCFI) -> void {
+    emitIncrement(InsMBB, InsPt, DL, SystemZ::R15D, -int64_t(Size), ZII);
+    if (EmitCFI) {
+      SPOffsetFromCFA -= Size;
+      buildCFAOffs(InsMBB, InsPt, DL, SPOffsetFromCFA, ZII);
+    }
+    // Probe by means of a volatile compare.
+    MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo(),
+      MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, 8, Align(1));
+    BuildMI(InsMBB, InsPt, DL, ZII->get(SystemZ::CG))
+      .addReg(SystemZ::R0D, RegState::Undef)
+      .addReg(SystemZ::R15D).addImm(Size - 8).addReg(0)
+      .addMemOperand(MMO);
+  };
+
+  if (NumFullBlocks < 3) {
+    // Emit unrolled probe statements.
+    for (unsigned int i = 0; i < NumFullBlocks; i++)
+      allocateAndProbe(*MBB, MBBI, ProbeSize, true/*EmitCFI*/);
+  } else {
+    // Emit a loop probing the pages.
+    uint64_t LoopAlloc = ProbeSize * NumFullBlocks;
+    SPOffsetFromCFA -= LoopAlloc;
+
+    BuildMI(*MBB, MBBI, DL, ZII->get(SystemZ::LGR), SystemZ::R1D)
+      .addReg(SystemZ::R15D);
+    buildDefCFAReg(*MBB, MBBI, DL, SystemZ::R1D, ZII);
+    emitIncrement(*MBB, MBBI, DL, SystemZ::R1D, -int64_t(LoopAlloc), ZII);
+    buildCFAOffs(*MBB, MBBI, DL, -int64_t(SystemZMC::CallFrameSize + LoopAlloc),
+                 ZII);
+
+    MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MBBI, MBB);
+    MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(MBB);
+    MBB->addSuccessor(LoopMBB);
+    LoopMBB->addSuccessor(LoopMBB);
+    LoopMBB->addSuccessor(DoneMBB);
+
+    MBB = LoopMBB;
+    allocateAndProbe(*MBB, MBB->end(), ProbeSize, false/*EmitCFI*/);
+    BuildMI(*MBB, MBB->end(), DL, ZII->get(SystemZ::CLGR))
+      .addReg(SystemZ::R15D).addReg(SystemZ::R1D);
+    BuildMI(*MBB, MBB->end(), DL, ZII->get(SystemZ::BRC))
+      .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_GT).addMBB(MBB);
+
+    MBB = DoneMBB;
+    MBBI = DoneMBB->begin();
+    buildDefCFAReg(*MBB, MBBI, DL, SystemZ::R15D, ZII);
+
+    recomputeLiveIns(*DoneMBB);
+    recomputeLiveIns(*LoopMBB);
+  }
+
+  if (Residual)
+    allocateAndProbe(*MBB, MBBI, Residual, true/*EmitCFI*/);
+
+  StackAllocMI->eraseFromParent();
+}
+
 bool SystemZFrameLowering::hasFP(const MachineFunction &MF) const {
   return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
           MF.getFrameInfo().hasVarSizedObjects() ||

diff  --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
index b23f88f7de1f..8752acc7e5ae 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -43,6 +43,8 @@ class SystemZFrameLowering : public TargetFrameLowering {
                                            RegScavenger *RS) const override;
   void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+  void inlineStackProbe(MachineFunction &MF,
+                        MachineBasicBlock &PrologMBB) const override;
   bool hasFP(const MachineFunction &MF) const override;
   bool hasReservedCallFrame(const MachineFunction &MF) const override;
   int getFrameIndexReference(const MachineFunction &MF, int FI,

diff  --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 59896d628816..a753f2a14a35 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -826,6 +826,15 @@ bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
   return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget);
 }
 
+/// Returns true if stack probing through inline assembly is requested.
+bool SystemZTargetLowering::hasInlineStackProbe(MachineFunction &MF) const {
+  // If the function specifically requests inline stack probes, emit them.
+  if (MF.getFunction().hasFnAttribute("probe-stack"))
+    return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
+           "inline-asm";
+  return false;
+}
+
 bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
   // We can use CGFI or CLGFI.
   return isInt<32>(Imm) || isUInt<32>(Imm);
@@ -3428,10 +3437,17 @@ lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
                               DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
 
   // Get the new stack pointer value.
-  SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
-
-  // Copy the new stack pointer back.
-  Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
+  SDValue NewSP;
+  if (hasInlineStackProbe(MF)) {
+    NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL,
+                DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
+    Chain = NewSP.getValue(1);
+  }
+  else {
+    NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
+    // Copy the new stack pointer back.
+    Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
+  }
 
   // The allocated data lives above the 160 bytes allocated for the standard
   // frame, plus any outgoing stack arguments.  We don't know how much that
@@ -5400,6 +5416,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
     OPCODE(BR_CCMASK);
     OPCODE(SELECT_CCMASK);
     OPCODE(ADJDYNALLOC);
+    OPCODE(PROBED_ALLOCA);
     OPCODE(POPCNT);
     OPCODE(SMUL_LOHI);
     OPCODE(UMUL_LOHI);
@@ -6825,38 +6842,29 @@ SystemZTargetLowering::ComputeNumSignBitsForTargetNode(
   return 1;
 }
 
+unsigned
+SystemZTargetLowering::getStackProbeSize(MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
+  unsigned StackAlign = TFI->getStackAlignment();
+  assert(StackAlign >=1 && isPowerOf2_32(StackAlign) &&
+         "Unexpected stack alignment");
+  // The default stack probe size is 4096 if the function has no
+  // stack-probe-size attribute.
+  unsigned StackProbeSize = 4096;
+  const Function &Fn = MF.getFunction();
+  if (Fn.hasFnAttribute("stack-probe-size"))
+    Fn.getFnAttribute("stack-probe-size")
+        .getValueAsString()
+        .getAsInteger(0, StackProbeSize);
+  // Round down to the stack alignment.
+  StackProbeSize &= ~(StackAlign - 1);
+  return StackProbeSize ? StackProbeSize : StackAlign;
+}
+
 //===----------------------------------------------------------------------===//
 // Custom insertion
 //===----------------------------------------------------------------------===//
 
-// Create a new basic block after MBB.
-static MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB) {
-  MachineFunction &MF = *MBB->getParent();
-  MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
-  MF.insert(std::next(MachineFunction::iterator(MBB)), NewMBB);
-  return NewMBB;
-}
-
-// Split MBB after MI and return the new block (the one that contains
-// instructions after MI).
-static MachineBasicBlock *splitBlockAfter(MachineBasicBlock::iterator MI,
-                                          MachineBasicBlock *MBB) {
-  MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
-  NewMBB->splice(NewMBB->begin(), MBB,
-                 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
-  NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
-  return NewMBB;
-}
-
-// Split MBB before MI and return the new block (the one that contains MI).
-static MachineBasicBlock *splitBlockBefore(MachineBasicBlock::iterator MI,
-                                           MachineBasicBlock *MBB) {
-  MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
-  NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end());
-  NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
-  return NewMBB;
-}
-
 // Force base value Base into a register before MI.  Return the register.
 static Register forceReg(MachineInstr &MI, MachineOperand &Base,
                          const SystemZInstrInfo *TII) {
@@ -7027,8 +7035,8 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI,
   bool CCKilled =
       (LastMI->killsRegister(SystemZ::CC) || checkCCKill(*LastMI, MBB));
   MachineBasicBlock *StartMBB = MBB;
-  MachineBasicBlock *JoinMBB  = splitBlockAfter(LastMI, MBB);
-  MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
+  MachineBasicBlock *JoinMBB  = SystemZ::splitBlockAfter(LastMI, MBB);
+  MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
 
   // Unless CC was killed in the last Select instruction, mark it as
   // live-in to both FalseMBB and JoinMBB.
@@ -7121,8 +7129,8 @@ MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
     CCMask ^= CCValid;
 
   MachineBasicBlock *StartMBB = MBB;
-  MachineBasicBlock *JoinMBB  = splitBlockBefore(MI, MBB);
-  MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
+  MachineBasicBlock *JoinMBB  = SystemZ::splitBlockBefore(MI, MBB);
+  MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
 
   // Unless CC was killed in the CondStore instruction, mark it as
   // live-in to both FalseMBB and JoinMBB.
@@ -7205,8 +7213,8 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
 
   // Insert a basic block for the main loop.
   MachineBasicBlock *StartMBB = MBB;
-  MachineBasicBlock *DoneMBB  = splitBlockBefore(MI, MBB);
-  MachineBasicBlock *LoopMBB  = emitBlockAfter(StartMBB);
+  MachineBasicBlock *DoneMBB  = SystemZ::splitBlockBefore(MI, MBB);
+  MachineBasicBlock *LoopMBB  = SystemZ::emitBlockAfter(StartMBB);
 
   //  StartMBB:
   //   ...
@@ -7323,10 +7331,10 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
 
   // Insert 3 basic blocks for the loop.
   MachineBasicBlock *StartMBB  = MBB;
-  MachineBasicBlock *DoneMBB   = splitBlockBefore(MI, MBB);
-  MachineBasicBlock *LoopMBB   = emitBlockAfter(StartMBB);
-  MachineBasicBlock *UseAltMBB = emitBlockAfter(LoopMBB);
-  MachineBasicBlock *UpdateMBB = emitBlockAfter(UseAltMBB);
+  MachineBasicBlock *DoneMBB   = SystemZ::splitBlockBefore(MI, MBB);
+  MachineBasicBlock *LoopMBB   = SystemZ::emitBlockAfter(StartMBB);
+  MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB);
+  MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB);
 
   //  StartMBB:
   //   ...
@@ -7434,9 +7442,9 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
 
   // Insert 2 basic blocks for the loop.
   MachineBasicBlock *StartMBB = MBB;
-  MachineBasicBlock *DoneMBB  = splitBlockBefore(MI, MBB);
-  MachineBasicBlock *LoopMBB  = emitBlockAfter(StartMBB);
-  MachineBasicBlock *SetMBB   = emitBlockAfter(LoopMBB);
+  MachineBasicBlock *DoneMBB  = SystemZ::splitBlockBefore(MI, MBB);
+  MachineBasicBlock *LoopMBB  = SystemZ::emitBlockAfter(StartMBB);
+  MachineBasicBlock *SetMBB   = SystemZ::emitBlockAfter(LoopMBB);
 
   //  StartMBB:
   //   ...
@@ -7596,7 +7604,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
   // When generating more than one CLC, all but the last will need to
   // branch to the end when a 
diff erence is found.
   MachineBasicBlock *EndMBB = (Length > 256 && Opcode == SystemZ::CLC ?
-                               splitBlockAfter(MI, MBB) : nullptr);
+                               SystemZ::splitBlockAfter(MI, MBB) : nullptr);
 
   // Check for the loop form, in which operand 5 is the trip count.
   if (MI.getNumExplicitOperands() > 5) {
@@ -7620,9 +7628,10 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
     Register NextCountReg = MRI.createVirtualRegister(RC);
 
     MachineBasicBlock *StartMBB = MBB;
-    MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
-    MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
-    MachineBasicBlock *NextMBB = (EndMBB ? emitBlockAfter(LoopMBB) : LoopMBB);
+    MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
+    MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
+    MachineBasicBlock *NextMBB =
+        (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
 
     //  StartMBB:
     //   # fall through to LoopMMB
@@ -7738,7 +7747,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
     // If there's another CLC to go, branch to the end if a 
diff erence
     // was found.
     if (EndMBB && Length > 0) {
-      MachineBasicBlock *NextMBB = splitBlockBefore(MI, MBB);
+      MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB);
       BuildMI(MBB, DL, TII->get(SystemZ::BRC))
         .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
         .addMBB(EndMBB);
@@ -7778,8 +7787,8 @@ MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
   uint64_t End2Reg  = MRI.createVirtualRegister(RC);
 
   MachineBasicBlock *StartMBB = MBB;
-  MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
-  MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
+  MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
+  MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
 
   //  StartMBB:
   //   # fall through to LoopMMB
@@ -7890,6 +7899,97 @@ MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
   return MBB;
 }
 
+MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
+    MachineInstr &MI, MachineBasicBlock *MBB) const {
+  MachineFunction &MF = *MBB->getParent();
+  MachineRegisterInfo *MRI = &MF.getRegInfo();
+  const SystemZInstrInfo *TII =
+      static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+  DebugLoc DL = MI.getDebugLoc();
+  const unsigned ProbeSize = getStackProbeSize(MF);
+  Register DstReg = MI.getOperand(0).getReg();
+  Register SizeReg = MI.getOperand(2).getReg();
+
+  MachineBasicBlock *StartMBB = MBB;
+  MachineBasicBlock *DoneMBB  = SystemZ::splitBlockAfter(MI, MBB);
+  MachineBasicBlock *LoopTestMBB  = SystemZ::emitBlockAfter(StartMBB);
+  MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB);
+  MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB);
+  MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB);
+
+  MachineMemOperand *VolLdMMO = MF.getMachineMemOperand(MachinePointerInfo(),
+    MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, 8, Align(1));
+
+  Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+  Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+
+  //  LoopTestMBB
+  //  BRC TailTestMBB
+  //  # fallthrough to LoopBodyMBB
+  StartMBB->addSuccessor(LoopTestMBB);
+  MBB = LoopTestMBB;
+  BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg)
+    .addReg(SizeReg)
+    .addMBB(StartMBB)
+    .addReg(IncReg)
+    .addMBB(LoopBodyMBB);
+  BuildMI(MBB, DL, TII->get(SystemZ::CLGFI))
+    .addReg(PHIReg)
+    .addImm(ProbeSize);
+  BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+    .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_LT)
+    .addMBB(TailTestMBB);
+  MBB->addSuccessor(LoopBodyMBB);
+  MBB->addSuccessor(TailTestMBB);
+
+  //  LoopBodyMBB: Allocate and probe by means of a volatile compare.
+  //  J LoopTestMBB
+  MBB = LoopBodyMBB;
+  BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg)
+    .addReg(PHIReg)
+    .addImm(ProbeSize);
+  BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D)
+    .addReg(SystemZ::R15D)
+    .addImm(ProbeSize);
+  BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
+    .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0)
+    .setMemRefs(VolLdMMO);
+  BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB);
+  MBB->addSuccessor(LoopTestMBB);
+
+  //  TailTestMBB
+  //  BRC DoneMBB
+  //  # fallthrough to TailMBB
+  MBB = TailTestMBB;
+  BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
+    .addReg(PHIReg)
+    .addImm(0);
+  BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+    .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ)
+    .addMBB(DoneMBB);
+  MBB->addSuccessor(TailMBB);
+  MBB->addSuccessor(DoneMBB);
+
+  //  TailMBB
+  //  # fallthrough to DoneMBB
+  MBB = TailMBB;
+  BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D)
+    .addReg(SystemZ::R15D)
+    .addReg(PHIReg);
+  BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
+    .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg)
+    .setMemRefs(VolLdMMO);
+  MBB->addSuccessor(DoneMBB);
+
+  //  DoneMBB
+  MBB = DoneMBB;
+  BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg)
+    .addReg(SystemZ::R15D);
+
+  MI.eraseFromParent();
+  return DoneMBB;
+}
+
 MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
     MachineInstr &MI, MachineBasicBlock *MBB) const {
   switch (MI.getOpcode()) {
@@ -8150,6 +8250,9 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
   case SystemZ::LTXBRCompare_VecPseudo:
     return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
 
+  case SystemZ::PROBED_ALLOCA:
+    return emitProbedAlloca(MI, MBB);
+
   case TargetOpcode::STACKMAP:
   case TargetOpcode::PATCHPOINT:
     return emitPatchPoint(MI, MBB);

diff  --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index dd6098d7bb94..e60deaedbdfb 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -83,6 +83,10 @@ enum NodeType : unsigned {
   // base of the dynamically-allocatable area.
   ADJDYNALLOC,
 
+  // For allocating stack space when using stack clash protector.
+  // Allocation is performed by block, and each block is probed.
+  PROBED_ALLOCA,
+
   // Count number of bits set in operand 0 per byte.
   POPCNT,
 
@@ -428,6 +432,7 @@ class SystemZTargetLowering : public TargetLowering {
                                   EVT VT) const override;
   bool isFPImmLegal(const APFloat &Imm, EVT VT,
                     bool ForCodeSize) const override;
+  bool hasInlineStackProbe(MachineFunction &MF) const override;
   bool isLegalICmpImmediate(int64_t Imm) const override;
   bool isLegalAddImmediate(int64_t Imm) const override;
   bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
@@ -556,6 +561,8 @@ class SystemZTargetLowering : public TargetLowering {
     return true;
   }
 
+  unsigned getStackProbeSize(MachineFunction &MF) const;
+
 private:
   const SystemZSubtarget &Subtarget;
 
@@ -691,6 +698,8 @@ class SystemZTargetLowering : public TargetLowering {
   MachineBasicBlock *emitLoadAndTestCmp0(MachineInstr &MI,
                                          MachineBasicBlock *MBB,
                                          unsigned Opcode) const;
+  MachineBasicBlock *emitProbedAlloca(MachineInstr &MI,
+                                      MachineBasicBlock *MBB) const;
 
   MachineMemOperand::Flags
   getTargetMMOFlags(const Instruction &I) const override;

diff  --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index be791bd7acf1..223cfcba2fac 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -1872,6 +1872,30 @@ unsigned SystemZ::reverseCCMask(unsigned CCMask) {
           (CCMask & SystemZ::CCMASK_CMP_UO));
 }
 
+MachineBasicBlock *SystemZ::emitBlockAfter(MachineBasicBlock *MBB) {
+  MachineFunction &MF = *MBB->getParent();
+  MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
+  MF.insert(std::next(MachineFunction::iterator(MBB)), NewMBB);
+  return NewMBB;
+}
+
+MachineBasicBlock *SystemZ::splitBlockAfter(MachineBasicBlock::iterator MI,
+                                            MachineBasicBlock *MBB) {
+  MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
+  NewMBB->splice(NewMBB->begin(), MBB,
+                 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
+  NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
+  return NewMBB;
+}
+
+MachineBasicBlock *SystemZ::splitBlockBefore(MachineBasicBlock::iterator MI,
+                                             MachineBasicBlock *MBB) {
+  MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
+  NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end());
+  NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
+  return NewMBB;
+}
+
 unsigned SystemZInstrInfo::getLoadAndTrap(unsigned Opcode) const {
   if (!STI.hasLoadAndTrap())
     return 0;

diff  --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
index 2247f9f3fdbf..72dafc3c93c2 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -159,6 +159,16 @@ int getTargetMemOpcode(uint16_t Opcode);
 // Return a version of comparison CC mask CCMask in which the LT and GT
 // actions are swapped.
 unsigned reverseCCMask(unsigned CCMask);
+
+// Create a new basic block after MBB.
+MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB);
+// Split MBB after MI and return the new block (the one that contains
+// instructions after MI).
+MachineBasicBlock *splitBlockAfter(MachineBasicBlock::iterator MI,
+                                   MachineBasicBlock *MBB);
+// Split MBB before MI and return the new block (the one that contains MI).
+MachineBasicBlock *splitBlockBefore(MachineBasicBlock::iterator MI,
+                                    MachineBasicBlock *MBB);
 }
 
 class SystemZInstrInfo : public SystemZGenInstrInfo {

diff  --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 5e153f0d1279..d5d56ecf6e47 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -29,6 +29,15 @@ let hasNoSchedulingInfo = 1, hasSideEffects = 1 in {
 def ADJDYNALLOC : Pseudo<(outs GR64:$dst), (ins dynalloc12only:$src),
                          [(set GR64:$dst, dynalloc12only:$src)]>;
 
+let Defs = [R15D, CC], Uses = [R15D], hasNoSchedulingInfo = 1,
+    usesCustomInserter = 1 in
+  def PROBED_ALLOCA : Pseudo<(outs GR64:$dst),
+                             (ins GR64:$oldSP, GR64:$space),
+           [(set GR64:$dst, (z_probed_alloca GR64:$oldSP, GR64:$space))]>;
+
+let Defs = [R1D, R15D, CC], Uses = [R15D], hasNoSchedulingInfo = 1,
+    hasSideEffects = 1 in
+  def PROBED_STACKALLOC : Pseudo<(outs), (ins i64imm:$stacksize), []>;
 
 //===----------------------------------------------------------------------===//
 // Branch instructions

diff  --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td
index 852c6282be6e..81af5fd854db 100644
--- a/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -40,6 +40,10 @@ def SDT_ZWrapOffset         : SDTypeProfile<1, 2,
                                              SDTCisSameAs<0, 2>,
                                              SDTCisPtrTy<0>]>;
 def SDT_ZAdjDynAlloc        : SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>;
+def SDT_ZProbedAlloca       : SDTypeProfile<1, 2,
+                                            [SDTCisSameAs<0, 1>,
+                                             SDTCisSameAs<0, 2>,
+                                             SDTCisPtrTy<0>]>;
 def SDT_ZGR128Binary        : SDTypeProfile<1, 2,
                                             [SDTCisVT<0, untyped>,
                                              SDTCisInt<1>,
@@ -269,6 +273,8 @@ def z_select_ccmask_1   : SDNode<"SystemZISD::SELECT_CCMASK",
                                  SDT_ZSelectCCMask>;
 def z_ipm_1             : SDNode<"SystemZISD::IPM", SDT_ZIPM>;
 def z_adjdynalloc       : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>;
+def z_probed_alloca     : SDNode<"SystemZISD::PROBED_ALLOCA", SDT_ZProbedAlloca,
+                                 [SDNPHasChain]>;
 def z_popcnt            : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>;
 def z_smul_lohi         : SDNode<"SystemZISD::SMUL_LOHI", SDT_ZGR128Binary>;
 def z_umul_lohi         : SDNode<"SystemZISD::UMUL_LOHI", SDT_ZGR128Binary>;

diff  --git a/llvm/test/CodeGen/SystemZ/stack-clash-dynamic-alloca.ll b/llvm/test/CodeGen/SystemZ/stack-clash-dynamic-alloca.ll
new file mode 100644
index 000000000000..748f441a9219
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/stack-clash-dynamic-alloca.ll
@@ -0,0 +1,136 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+define i32 @fun0(i32 %n) #0 {
+; CHECK-LABEL: fun0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r11, %r15, 88(%r15)
+; CHECK-NEXT:    .cfi_offset %r11, -72
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -160
+; CHECK-NEXT:    .cfi_def_cfa_offset 320
+; CHECK-NEXT:    lgr %r11, %r15
+; CHECK-NEXT:    .cfi_def_cfa_register %r11
+; CHECK-NEXT:    # kill: def $r2l killed $r2l def $r2d
+; CHECK-NEXT:    risbgn %r1, %r2, 30, 189, 2
+; CHECK-NEXT:    la %r0, 7(%r1)
+; CHECK-NEXT:    risbgn %r1, %r0, 29, 188, 0
+; CHECK-NEXT:    clgfi %r1, 4096
+; CHECK-NEXT:    jl .LBB0_2
+; CHECK-NEXT:  .LBB0_1: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    slgfi %r1, 4096
+; CHECK-NEXT:    slgfi %r15, 4096
+; CHECK-NEXT:    cg %r15, 4088(%r15)
+; CHECK-NEXT:    clgfi %r1, 4096
+; CHECK-NEXT:    jhe .LBB0_1
+; CHECK-NEXT:  .LBB0_2:
+; CHECK-NEXT:    cgije %r1, 0, .LBB0_4
+; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:    slgr %r15, %r1
+; CHECK-NEXT:    cg %r15, -8(%r1,%r15)
+; CHECK-NEXT:  .LBB0_4:
+; CHECK-NEXT:    la %r1, 160(%r15)
+; CHECK-NEXT:    lhi %r0, 1
+; CHECK-NEXT:    sty %r0, 4792(%r1)
+; CHECK-NEXT:    l %r2, 0(%r1)
+; CHECK-NEXT:    lmg %r11, %r15, 248(%r11)
+; CHECK-NEXT:    br %r14
+
+  %a = alloca i32, i32 %n
+  %b = getelementptr inbounds i32, i32* %a, i64 1198
+  store volatile i32 1, i32* %b
+  %c = load volatile i32, i32* %a
+  ret i32 %c
+}
+
+; Probe size should be modulo stack alignment.
+define i32 @fun1(i32 %n) #0 "stack-probe-size"="1250" {
+; CHECK-LABEL: fun1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r11, %r15, 88(%r15)
+; CHECK-NEXT:    .cfi_offset %r11, -72
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -160
+; CHECK-NEXT:    .cfi_def_cfa_offset 320
+; CHECK-NEXT:    lgr %r11, %r15
+; CHECK-NEXT:    .cfi_def_cfa_register %r11
+; CHECK-NEXT:    # kill: def $r2l killed $r2l def $r2d
+; CHECK-NEXT:    risbgn %r1, %r2, 30, 189, 2
+; CHECK-NEXT:    la %r0, 7(%r1)
+; CHECK-NEXT:    risbgn %r1, %r0, 29, 188, 0
+; CHECK-NEXT:    clgfi %r1, 1248
+; CHECK-NEXT:    jl .LBB1_2
+; CHECK-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    slgfi %r1, 1248
+; CHECK-NEXT:    slgfi %r15, 1248
+; CHECK-NEXT:    cg %r15, 1240(%r15)
+; CHECK-NEXT:    clgfi %r1, 1248
+; CHECK-NEXT:    jhe .LBB1_1
+; CHECK-NEXT:  .LBB1_2:
+; CHECK-NEXT:    cgije %r1, 0, .LBB1_4
+; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:    slgr %r15, %r1
+; CHECK-NEXT:    cg %r15, -8(%r1,%r15)
+; CHECK-NEXT:  .LBB1_4:
+; CHECK-NEXT:    la %r1, 160(%r15)
+; CHECK-NEXT:    lhi %r0, 1
+; CHECK-NEXT:    sty %r0, 4792(%r1)
+; CHECK-NEXT:    l %r2, 0(%r1)
+; CHECK-NEXT:    lmg %r11, %r15, 248(%r11)
+; CHECK-NEXT:    br %r14
+  %a = alloca i32, i32 %n
+  %b = getelementptr inbounds i32, i32* %a, i64 1198
+  store volatile i32 1, i32* %b
+  %c = load volatile i32, i32* %a
+  ret i32 %c
+}
+
+; The minimum probe size is the stack alignment.
+define i32 @fun2(i32 %n) #0 "stack-probe-size"="4" {
+; CHECK-LABEL: fun2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r11, %r15, 88(%r15)
+; CHECK-NEXT:    .cfi_offset %r11, -72
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    lgr %r1, %r15
+; CHECK-NEXT:    .cfi_def_cfa_register %r1
+; CHECK-NEXT:    aghi %r1, -160
+; CHECK-NEXT:    .cfi_def_cfa_offset 320
+; CHECK-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    aghi %r15, -8
+; CHECK-NEXT:    cg %r0, 0(%r15)
+; CHECK-NEXT:    clgrjh %r15, %r1, .LBB2_1
+; CHECK-NEXT:  # %bb.2:
+; CHECK-NEXT:    .cfi_def_cfa_register %r15
+; CHECK-NEXT:    lgr %r11, %r15
+; CHECK-NEXT:    .cfi_def_cfa_register %r11
+; CHECK-NEXT:    # kill: def $r2l killed $r2l def $r2d
+; CHECK-NEXT:    risbgn %r1, %r2, 30, 189, 2
+; CHECK-NEXT:    la %r0, 7(%r1)
+; CHECK-NEXT:    risbgn %r1, %r0, 29, 188, 0
+; CHECK-NEXT:    clgijl %r1, 8, .LBB2_4
+; CHECK-NEXT:  .LBB2_3: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    slgfi %r1, 8
+; CHECK-NEXT:    slgfi %r15, 8
+; CHECK-NEXT:    cg %r15, 0(%r15)
+; CHECK-NEXT:    clgijhe %r1, 8, .LBB2_3
+; CHECK-NEXT:  .LBB2_4:
+; CHECK-NEXT:    cgije %r1, 0, .LBB2_6
+; CHECK-NEXT:  # %bb.5:
+; CHECK-NEXT:    slgr %r15, %r1
+; CHECK-NEXT:    cg %r15, -8(%r1,%r15)
+; CHECK-NEXT:  .LBB2_6:
+; CHECK-NEXT:    la %r1, 160(%r15)
+; CHECK-NEXT:    lhi %r0, 1
+; CHECK-NEXT:    sty %r0, 4792(%r1)
+; CHECK-NEXT:    l %r2, 0(%r1)
+; CHECK-NEXT:    lmg %r11, %r15, 248(%r11)
+; CHECK-NEXT:    br %r14
+  %a = alloca i32, i32 %n
+  %b = getelementptr inbounds i32, i32* %a, i64 1198
+  store volatile i32 1, i32* %b
+  %c = load volatile i32, i32* %a
+  ret i32 %c
+}
+
+attributes #0 =  {"probe-stack"="inline-asm"}

diff  --git a/llvm/test/CodeGen/SystemZ/stack-clash-protection.ll b/llvm/test/CodeGen/SystemZ/stack-clash-protection.ll
new file mode 100644
index 000000000000..8166af3673cd
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/stack-clash-protection.ll
@@ -0,0 +1,242 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -O3 | FileCheck %s
+;
+; Test stack clash protection probing for static allocas.
+
+; Small: one probe.
+define i32 @fun0() #0 {
+; CHECK-LABEL: fun0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    aghi %r15, -560
+; CHECK-NEXT:    .cfi_def_cfa_offset 720
+; CHECK-NEXT:    cg %r0, 552(%r15)
+; CHECK-NEXT:    mvhi 552(%r15), 1
+; CHECK-NEXT:    l %r2, 160(%r15)
+; CHECK-NEXT:    aghi %r15, 560
+; CHECK-NEXT:    br %r14
+
+  %a = alloca i32, i64 100
+  %b = getelementptr inbounds i32, i32* %a, i64 98
+  store volatile i32 1, i32* %b
+  %c = load volatile i32, i32* %a
+  ret i32 %c
+}
+
+; Medium: two probes.
+define i32 @fun1() #0 {
+; CHECK-LABEL: fun1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    aghi %r15, -4096
+; CHECK-NEXT:    .cfi_def_cfa_offset 4256
+; CHECK-NEXT:    cg %r0, 4088(%r15)
+; CHECK-NEXT:    aghi %r15, -4080
+; CHECK-NEXT:    .cfi_def_cfa_offset 8336
+; CHECK-NEXT:    cg %r0, 4072(%r15)
+; CHECK-NEXT:    mvhi 976(%r15), 1
+; CHECK-NEXT:    l %r2, 176(%r15)
+; CHECK-NEXT:    aghi %r15, 8176
+; CHECK-NEXT:    br %r14
+
+  %a = alloca i32, i64 2000
+  %b = getelementptr inbounds i32, i32* %a, i64 200
+  store volatile i32 1, i32* %b
+  %c = load volatile i32, i32* %a
+  ret i32 %c
+}
+
+; Large: Use a loop to allocate and probe in steps.
+define i32 @fun2() #0 {
+; CHECK-LABEL: fun2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lgr %r1, %r15
+; CHECK-NEXT:    .cfi_def_cfa_register %r1
+; CHECK-NEXT:    agfi %r1, -69632
+; CHECK-NEXT:    .cfi_def_cfa_offset 69792
+; CHECK-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    aghi %r15, -4096
+; CHECK-NEXT:    cg %r0, 4088(%r15)
+; CHECK-NEXT:    clgrjh %r15, %r1, .LBB2_1
+; CHECK-NEXT:  # %bb.2:
+; CHECK-NEXT:    .cfi_def_cfa_register %r15
+; CHECK-NEXT:    aghi %r15, -2544
+; CHECK-NEXT:    .cfi_def_cfa_offset 72336
+; CHECK-NEXT:    cg %r0, 2536(%r15)
+; CHECK-NEXT:    lhi %r0, 1
+; CHECK-NEXT:    mvhi 568(%r15), 1
+; CHECK-NEXT:    sty %r0, 28968(%r15)
+; CHECK-NEXT:    l %r2, 176(%r15)
+; CHECK-NEXT:    agfi %r15, 72176
+; CHECK-NEXT:    br %r14
+
+  %a = alloca i32, i64 18000
+  %b0 = getelementptr inbounds i32, i32* %a, i64 98
+  %b1 = getelementptr inbounds i32, i32* %a, i64 7198
+  store volatile i32 1, i32* %b0
+  store volatile i32 1, i32* %b1
+  %c = load volatile i32, i32* %a
+  ret i32 %c
+}
+
+; Ends evenly on the step so no remainder needed.
+define void @fun3() #0 {
+; CHECK-LABEL: fun3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lgr %r1, %r15
+; CHECK-NEXT:    .cfi_def_cfa_register %r1
+; CHECK-NEXT:    aghi %r1, -28672
+; CHECK-NEXT:    .cfi_def_cfa_offset 28832
+; CHECK-NEXT:  .LBB3_1: # %entry
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    aghi %r15, -4096
+; CHECK-NEXT:    cg %r0, 4088(%r15)
+; CHECK-NEXT:    clgrjh %r15, %r1, .LBB3_1
+; CHECK-NEXT:  # %bb.2: # %entry
+; CHECK-NEXT:    .cfi_def_cfa_register %r15
+; CHECK-NEXT:    mvhi 180(%r15), 0
+; CHECK-NEXT:    l %r0, 180(%r15)
+; CHECK-NEXT:    aghi %r15, 28672
+; CHECK-NEXT:    br %r14
+entry:
+  %stack = alloca [7122 x i32], align 4
+  %i = alloca i32, align 4
+  %0 = bitcast [7122 x i32]* %stack to i8*
+  %i.0.i.0..sroa_cast = bitcast i32* %i to i8*
+  store volatile i32 0, i32* %i, align 4
+  %i.0.i.0.6 = load volatile i32, i32* %i, align 4
+  ret void
+}
+
+; Loop with bigger step.
+define void @fun4() #0 "stack-probe-size"="8192" {
+; CHECK-LABEL: fun4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lgr %r1, %r15
+; CHECK-NEXT:    .cfi_def_cfa_register %r1
+; CHECK-NEXT:    aghi %r1, -24576
+; CHECK-NEXT:    .cfi_def_cfa_offset 24736
+; CHECK-NEXT:  .LBB4_1: # %entry
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    aghi %r15, -8192
+; CHECK-NEXT:    cg %r0, 8184(%r15)
+; CHECK-NEXT:    clgrjh %r15, %r1, .LBB4_1
+; CHECK-NEXT:  # %bb.2: # %entry
+; CHECK-NEXT:    .cfi_def_cfa_register %r15
+; CHECK-NEXT:    aghi %r15, -7608
+; CHECK-NEXT:    .cfi_def_cfa_offset 32344
+; CHECK-NEXT:    cg %r0, 7600(%r15)
+; CHECK-NEXT:    mvhi 180(%r15), 0
+; CHECK-NEXT:    l %r0, 180(%r15)
+; CHECK-NEXT:    aghi %r15, 32184
+; CHECK-NEXT:    br %r14
+entry:
+  %stack = alloca [8000 x i32], align 4
+  %i = alloca i32, align 4
+  %0 = bitcast [8000 x i32]* %stack to i8*
+  %i.0.i.0..sroa_cast = bitcast i32* %i to i8*
+  store volatile i32 0, i32* %i, align 4
+  %i.0.i.0.6 = load volatile i32, i32* %i, align 4
+  ret void
+}
+
+; Probe size should be modulo stack alignment.
+define void @fun5() #0 "stack-probe-size"="4100" {
+; CHECK-LABEL: fun5:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    aghi %r15, -4096
+; CHECK-NEXT:    .cfi_def_cfa_offset 4256
+; CHECK-NEXT:    cg %r0, 4088(%r15)
+; CHECK-NEXT:    aghi %r15, -88
+; CHECK-NEXT:    .cfi_def_cfa_offset 4344
+; CHECK-NEXT:    cg %r0, 80(%r15)
+; CHECK-NEXT:    mvhi 180(%r15), 0
+; CHECK-NEXT:    l %r0, 180(%r15)
+; CHECK-NEXT:    aghi %r15, 4184
+; CHECK-NEXT:    br %r14
+entry:
+  %stack = alloca [1000 x i32], align 4
+  %i = alloca i32, align 4
+  %0 = bitcast [1000 x i32]* %stack to i8*
+  %i.0.i.0..sroa_cast = bitcast i32* %i to i8*
+  store volatile i32 0, i32* %i, align 4
+  %i.0.i.0.6 = load volatile i32, i32* %i, align 4
+  ret void
+}
+
+; The minimum probe size is the stack alignment.
+define void @fun6() #0 "stack-probe-size"="5" {
+; CHECK-LABEL: fun6:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lgr %r1, %r15
+; CHECK-NEXT:    .cfi_def_cfa_register %r1
+; CHECK-NEXT:    aghi %r1, -4184
+; CHECK-NEXT:    .cfi_def_cfa_offset 4344
+; CHECK-NEXT:  .LBB6_1: # %entry
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    aghi %r15, -8
+; CHECK-NEXT:    cg %r0, 0(%r15)
+; CHECK-NEXT:    clgrjh %r15, %r1, .LBB6_1
+; CHECK-NEXT:  # %bb.2: # %entry
+; CHECK-NEXT:    .cfi_def_cfa_register %r15
+; CHECK-NEXT:    mvhi 180(%r15), 0
+; CHECK-NEXT:    l %r0, 180(%r15)
+; CHECK-NEXT:    aghi %r15, 4184
+; CHECK-NEXT:    br %r14
+entry:
+  %stack = alloca [1000 x i32], align 4
+  %i = alloca i32, align 4
+  %0 = bitcast [1000 x i32]* %stack to i8*
+  %i.0.i.0..sroa_cast = bitcast i32* %i to i8*
+  store volatile i32 0, i32* %i, align 4
+  %i.0.i.0.6 = load volatile i32, i32* %i, align 4
+  ret void
+}
+
+; Small with a natural probe (STMG) - needs no extra probe.
+define i32 @fun7() #0 {
+; CHECK-LABEL: fun7:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -3976
+; CHECK-NEXT:    .cfi_def_cfa_offset 4136
+; CHECK-NEXT:    brasl %r14, foo at PLT
+; CHECK-NEXT:    st %r2, 568(%r15)
+; CHECK-NEXT:    l %r2, 176(%r15)
+; CHECK-NEXT:    lmg %r14, %r15, 4088(%r15)
+; CHECK-NEXT:    br %r14
+  %v = call i32 @foo()
+  %a = alloca i32, i64 950
+  %b = getelementptr inbounds i32, i32* %a, i64 98
+  store volatile i32 %v, i32* %b
+  %c = load volatile i32, i32* %a
+  ret i32 %c
+}
+
+; Medium with an STMG - still needs probing.
+define i32 @fun8() #0 {
+; CHECK-LABEL: fun8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -3984
+; CHECK-NEXT:    .cfi_def_cfa_offset 4144
+; CHECK-NEXT:    cg %r0, 3976(%r15)
+; CHECK-NEXT:    brasl %r14, foo at PLT
+; CHECK-NEXT:    st %r2, 976(%r15)
+; CHECK-NEXT:    l %r2, 176(%r15)
+; CHECK-NEXT:    lmg %r14, %r15, 4096(%r15)
+; CHECK-NEXT:    br %r14
+
+  %v = call i32 @foo()
+  %a = alloca i32, i64 952
+  %b = getelementptr inbounds i32, i32* %a, i64 200
+  store volatile i32 %v, i32* %b
+  %c = load volatile i32, i32* %a
+  ret i32 %c
+}
+
+declare i32 @foo()
+attributes #0 = {  "probe-stack"="inline-asm"  }
+


        


More information about the cfe-commits mailing list