[llvm] 708a478 - [RISCV] Add stack clash protection (#117612)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 10 08:48:30 PST 2024
Author: Raphael Moreira Zinsly
Date: 2024-12-10T16:48:26Z
New Revision: 708a478d6739aea20a8834cea45490f05b07ca10
URL: https://github.com/llvm/llvm-project/commit/708a478d6739aea20a8834cea45490f05b07ca10
DIFF: https://github.com/llvm/llvm-project/commit/708a478d6739aea20a8834cea45490f05b07ca10.diff
LOG: [RISCV] Add stack clash protection (#117612)
Enable `-fstack-clash-protection` for RISCV and stack probe for function
prologues.
We probe the stack by creating a loop that allocates and probe the stack
in ProbeSize chunks.
We emit an unrolled probe loop for small allocations and emit a variable
length probe loop for bigger ones.
Added:
llvm/test/CodeGen/RISCV/stack-clash-prologue-nounwind.ll
llvm/test/CodeGen/RISCV/stack-clash-prologue.ll
Modified:
clang/lib/Driver/ToolChains/Clang.cpp
llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
llvm/lib/Target/RISCV/RISCVFrameLowering.h
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/lib/Target/RISCV/RISCVISelLowering.h
llvm/lib/Target/RISCV/RISCVInstrInfo.td
llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp
llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
Removed:
################################################################################
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 7ab85ad9d100c2..d3206c3e8e25ed 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -3777,7 +3777,8 @@ static void RenderSCPOptions(const ToolChain &TC, const ArgList &Args,
return;
if (!EffectiveTriple.isX86() && !EffectiveTriple.isSystemZ() &&
- !EffectiveTriple.isPPC64() && !EffectiveTriple.isAArch64())
+ !EffectiveTriple.isPPC64() && !EffectiveTriple.isAArch64() &&
+ !EffectiveTriple.isRISCV())
return;
Args.addOptInFlag(CmdArgs, options::OPT_fstack_clash_protection,
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index deb0b627225c64..655de0b4e7eb5d 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -580,25 +580,124 @@ static MCCFIInstruction createDefCFAOffset(const TargetRegisterInfo &TRI,
Comment.str());
}
+// Allocate stack space and probe it if necessary.
void RISCVFrameLowering::allocateStack(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- MachineFunction &MF, StackOffset Offset,
- uint64_t RealStackSize,
- bool EmitCFI) const {
+ MachineFunction &MF, uint64_t Offset,
+ uint64_t RealStackSize, bool EmitCFI,
+ bool NeedProbe,
+ uint64_t ProbeSize) const {
DebugLoc DL;
const RISCVRegisterInfo *RI = STI.getRegisterInfo();
const RISCVInstrInfo *TII = STI.getInstrInfo();
- RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, Offset, MachineInstr::FrameSetup,
+ // Simply allocate the stack if it's not big enough to require a probe.
+ if (!NeedProbe || Offset <= ProbeSize) {
+ RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(-Offset),
+ MachineInstr::FrameSetup, getStackAlign());
+
+ if (EmitCFI) {
+ // Emit ".cfi_def_cfa_offset RealStackSize"
+ unsigned CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, RealStackSize));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ return;
+ }
+
+ // Unroll the probe loop depending on the number of iterations.
+ if (Offset < ProbeSize * 5) {
+ uint64_t CurrentOffset = 0;
+ bool IsRV64 = STI.is64Bit();
+ while (CurrentOffset + ProbeSize <= Offset) {
+ RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
+ StackOffset::getFixed(-ProbeSize), MachineInstr::FrameSetup,
+ getStackAlign());
+ // s[d|w] zero, 0(sp)
+ BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
+ .addReg(RISCV::X0)
+ .addReg(SPReg)
+ .addImm(0)
+ .setMIFlags(MachineInstr::FrameSetup);
+
+ CurrentOffset += ProbeSize;
+ if (EmitCFI) {
+ // Emit ".cfi_def_cfa_offset CurrentOffset"
+ unsigned CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, CurrentOffset));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ }
+
+ uint64_t Residual = Offset - CurrentOffset;
+ if (Residual) {
+ RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
+ StackOffset::getFixed(-Residual), MachineInstr::FrameSetup,
+ getStackAlign());
+ if (EmitCFI) {
+ // Emit ".cfi_def_cfa_offset Offset"
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Offset));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ }
+
+ return;
+ }
+
+ // Emit a variable-length allocation probing loop.
+ uint64_t RoundedSize = alignDown(Offset, ProbeSize);
+ uint64_t Residual = Offset - RoundedSize;
+
+ Register TargetReg = RISCV::X6;
+ // SUB TargetReg, SP, RoundedSize
+ RI->adjustReg(MBB, MBBI, DL, TargetReg, SPReg,
+ StackOffset::getFixed(-RoundedSize), MachineInstr::FrameSetup,
getStackAlign());
if (EmitCFI) {
- // Emit ".cfi_def_cfa_offset RealStackSize"
- unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::cfiDefCfaOffset(nullptr, RealStackSize));
+ // Set the CFA register to TargetReg.
+ unsigned Reg = STI.getRegisterInfo()->getDwarfRegNum(TargetReg, true);
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::cfiDefCfa(nullptr, Reg, RoundedSize));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
- .setMIFlag(MachineInstr::FrameSetup);
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+
+ // It will be expanded to a probe loop in `inlineStackProbe`.
+ BuildMI(MBB, MBBI, DL, TII->get(RISCV::PROBED_STACKALLOC))
+ .addReg(SPReg)
+ .addReg(TargetReg);
+
+ if (EmitCFI) {
+ // Set the CFA register back to SP.
+ unsigned Reg = STI.getRegisterInfo()->getDwarfRegNum(SPReg, true);
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+
+ if (Residual)
+ RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(-Residual),
+ MachineInstr::FrameSetup, getStackAlign());
+
+ if (EmitCFI) {
+ // Emit ".cfi_def_cfa_offset Offset"
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Offset));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
}
}
@@ -716,11 +815,14 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
getPushOrLibCallsSavedInfo(MF, CSI));
}
- if (StackSize != 0) {
- // Allocate space on the stack if necessary.
- allocateStack(MBB, MBBI, MF, StackOffset::getFixed(-StackSize),
- RealStackSize, /*EmitCFI=*/true);
- }
+ // Allocate space on the stack if necessary.
+ auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();
+ const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
+ bool NeedProbe = TLI->hasInlineStackProbe(MF);
+ uint64_t ProbeSize = TLI->getStackProbeSize(MF, getStackAlign());
+ if (StackSize != 0)
+ allocateStack(MBB, MBBI, MF, StackSize, RealStackSize, /*EmitCFI=*/true,
+ NeedProbe, ProbeSize);
// The frame pointer is callee-saved, and code has been generated for us to
// save it to the stack. We need to skip over the storing of callee-saved
@@ -761,8 +863,9 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
assert(SecondSPAdjustAmount > 0 &&
"SecondSPAdjustAmount should be greater than zero");
- allocateStack(MBB, MBBI, MF, StackOffset::getFixed(-SecondSPAdjustAmount),
- getStackSizeWithRVVPadding(MF), !hasFP(MF));
+ allocateStack(MBB, MBBI, MF, SecondSPAdjustAmount,
+ getStackSizeWithRVVPadding(MF), !hasFP(MF), NeedProbe,
+ ProbeSize);
}
if (RVVStackSize) {
@@ -1910,3 +2013,69 @@ bool RISCVFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
TargetStackID::Value RISCVFrameLowering::getStackIDForScalableVectors() const {
return TargetStackID::ScalableVector;
}
+
+// Synthesize the probe loop.
+static void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL) {
+
+ auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();
+ const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
+ bool IsRV64 = Subtarget.is64Bit();
+ Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();
+ const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
+ uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
+
+ MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
+ MachineBasicBlock *LoopTestMBB =
+ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+ MF.insert(MBBInsertPoint, LoopTestMBB);
+ MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+ MF.insert(MBBInsertPoint, ExitMBB);
+ MachineInstr::MIFlag Flags = MachineInstr::FrameSetup;
+ Register TargetReg = RISCV::X6;
+ Register ScratchReg = RISCV::X7;
+
+ // ScratchReg = ProbeSize
+ TII->movImm(MBB, MBBI, DL, ScratchReg, ProbeSize, Flags);
+
+ // LoopTest:
+ // SUB SP, SP, ProbeSize
+ BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::SUB), SPReg)
+ .addReg(SPReg)
+ .addReg(ScratchReg)
+ .setMIFlags(Flags);
+
+ // s[d|w] zero, 0(sp)
+ BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL,
+ TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
+ .addReg(RISCV::X0)
+ .addReg(SPReg)
+ .addImm(0)
+ .setMIFlags(Flags);
+
+ // BNE SP, TargetReg, LoopTest
+ BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BNE))
+ .addReg(SPReg)
+ .addReg(TargetReg)
+ .addMBB(LoopTestMBB)
+ .setMIFlags(Flags);
+
+ ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
+
+ LoopTestMBB->addSuccessor(ExitMBB);
+ LoopTestMBB->addSuccessor(LoopTestMBB);
+ MBB.addSuccessor(LoopTestMBB);
+}
+
+void RISCVFrameLowering::inlineStackProbe(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ auto Where = llvm::find_if(MBB, [](MachineInstr &MI) {
+ return MI.getOpcode() == RISCV::PROBED_STACKALLOC;
+ });
+ if (Where != MBB.end()) {
+ DebugLoc DL = MBB.findDebugLoc(Where);
+ emitStackProbeInline(MF, MBB, Where, DL);
+ Where->eraseFromParent();
+ }
+}
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
index ac0c805c744d63..190c063d9d3b5d 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
@@ -79,8 +79,9 @@ class RISCVFrameLowering : public TargetFrameLowering {
}
void allocateStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- MachineFunction &MF, StackOffset Offset,
- uint64_t RealStackSize, bool EmitCFI) const;
+ MachineFunction &MF, uint64_t Offset,
+ uint64_t RealStackSize, bool EmitCFI, bool NeedProbe,
+ uint64_t ProbeSize) const;
protected:
const RISCVSubtarget &STI;
@@ -103,6 +104,9 @@ class RISCVFrameLowering : public TargetFrameLowering {
std::pair<int64_t, Align>
assignRVVStackObjectOffsets(MachineFunction &MF) const;
+ // Replace a StackProbe stub (if any) with the actual probe code inline
+ void inlineStackProbe(MachineFunction &MF,
+ MachineBasicBlock &PrologueMBB) const override;
};
} // namespace llvm
#endif
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 46dedcc3e09cf2..c6838573637202 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -22350,3 +22350,25 @@ namespace llvm::RISCVVIntrinsicsTable {
#include "RISCVGenSearchableTables.inc"
} // namespace llvm::RISCVVIntrinsicsTable
+
+bool RISCVTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const {
+
+ // If the function specifically requests inline stack probes, emit them.
+ if (MF.getFunction().hasFnAttribute("probe-stack"))
+ return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
+ "inline-asm";
+
+ return false;
+}
+
+unsigned RISCVTargetLowering::getStackProbeSize(const MachineFunction &MF,
+ Align StackAlign) const {
+ // The default stack probe size is 4096 if the function has no
+ // stack-probe-size attribute.
+ const Function &Fn = MF.getFunction();
+ unsigned StackProbeSize =
+ Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
+ // Round down to the stack alignment.
+ StackProbeSize = alignDown(StackProbeSize, StackAlign.value());
+ return StackProbeSize ? StackProbeSize : StackAlign.value();
+}
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index bb0d9a71abf7e6..778e38a1a834ee 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -919,6 +919,11 @@ class RISCVTargetLowering : public TargetLowering {
MachineBasicBlock::instr_iterator &MBBI,
const TargetInstrInfo *TII) const override;
+ /// True if stack clash protection is enabled for this functions.
+ bool hasInlineStackProbe(const MachineFunction &MF) const override;
+
+ unsigned getStackProbeSize(const MachineFunction &MF, Align StackAlign) const;
+
private:
void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo,
const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index cad9f5e3790be1..14b571cebe1fec 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1373,6 +1373,17 @@ def PseudoAddTPRel : Pseudo<(outs GPR:$rd),
def : Pat<(FrameAddrRegImm (iPTR GPR:$rs1), simm12:$imm12),
(ADDI GPR:$rs1, simm12:$imm12)>;
+/// Stack probing
+
+let hasSideEffects = 1, mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
+// Probed stack allocation of a constant size, used in function prologues when
+// stack-clash protection is enabled.
+def PROBED_STACKALLOC : Pseudo<(outs GPR:$sp),
+ (ins GPR:$scratch),
+ []>,
+ Sched<[]>;
+}
+
/// HI and ADD_LO address nodes.
// Pseudo for a rematerializable LUI+ADDI sequence for loading an address.
diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp
index d0c363042f5118..a0d79317638184 100644
--- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "RISCVMachineFunctionInfo.h"
+#include "llvm/IR/Module.h"
using namespace llvm;
@@ -26,6 +27,35 @@ MachineFunctionInfo *RISCVMachineFunctionInfo::clone(
return DestMF.cloneInfo<RISCVMachineFunctionInfo>(*this);
}
+RISCVMachineFunctionInfo::RISCVMachineFunctionInfo(const Function &F,
+ const RISCVSubtarget *STI) {
+
+ // The default stack probe size is 4096 if the function has no
+ // stack-probe-size attribute. This is a safe default because it is the
+ // smallest possible guard page size.
+ uint64_t ProbeSize = 4096;
+ if (F.hasFnAttribute("stack-probe-size"))
+ ProbeSize = F.getFnAttributeAsParsedInteger("stack-probe-size");
+ else if (const auto *PS = mdconst::extract_or_null<ConstantInt>(
+ F.getParent()->getModuleFlag("stack-probe-size")))
+ ProbeSize = PS->getZExtValue();
+ assert(int64_t(ProbeSize) > 0 && "Invalid stack probe size");
+
+ // Round down to the stack alignment.
+ uint64_t StackAlign =
+ STI->getFrameLowering()->getTransientStackAlign().value();
+ ProbeSize = std::max(StackAlign, alignDown(ProbeSize, StackAlign));
+ StringRef ProbeKind;
+ if (F.hasFnAttribute("probe-stack"))
+ ProbeKind = F.getFnAttribute("probe-stack").getValueAsString();
+ else if (const auto *PS = dyn_cast_or_null<MDString>(
+ F.getParent()->getModuleFlag("probe-stack")))
+ ProbeKind = PS->getString();
+ if (ProbeKind.size()) {
+ StackProbeSize = ProbeSize;
+ }
+}
+
void yaml::RISCVMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
MappingTraits<RISCVMachineFunctionInfo>::mapping(YamlIO, *this);
}
diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
index 779c652b4d8fc4..8909f2f3bd3170 100644
--- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
@@ -76,8 +76,10 @@ class RISCVMachineFunctionInfo : public MachineFunctionInfo {
unsigned RVPushRegs = 0;
int RVPushRlist = llvm::RISCVZC::RLISTENCODE::INVALID_RLIST;
+ int64_t StackProbeSize = 0;
+
public:
- RISCVMachineFunctionInfo(const Function &F, const TargetSubtargetInfo *STI) {}
+ RISCVMachineFunctionInfo(const Function &F, const RISCVSubtarget *STI);
MachineFunctionInfo *
clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index c5847d8bfacb42..dcd3598f658f6a 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -271,8 +271,8 @@ RISCVTargetMachine::getSubtargetImpl(const Function &F) const {
MachineFunctionInfo *RISCVTargetMachine::createMachineFunctionInfo(
BumpPtrAllocator &Allocator, const Function &F,
const TargetSubtargetInfo *STI) const {
- return RISCVMachineFunctionInfo::create<RISCVMachineFunctionInfo>(Allocator,
- F, STI);
+ return RISCVMachineFunctionInfo::create<RISCVMachineFunctionInfo>(
+ Allocator, F, static_cast<const RISCVSubtarget *>(STI));
}
TargetTransformInfo
diff --git a/llvm/test/CodeGen/RISCV/stack-clash-prologue-nounwind.ll b/llvm/test/CodeGen/RISCV/stack-clash-prologue-nounwind.ll
new file mode 100644
index 00000000000000..3b2d7f1f9a8ea4
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/stack-clash-prologue-nounwind.ll
@@ -0,0 +1,345 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+m -O2 < %s \
+; RUN: | FileCheck %s -check-prefix=RV64I
+; RUN: llc -mtriple=riscv32 -mattr=+m -O2 < %s \
+; RUN: | FileCheck %s -check-prefix=RV32I
+
+; Tests copied from PowerPC.
+
+; Free probe
+define i8 @f0() #0 nounwind {
+; RV64I-LABEL: f0:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -64
+; RV64I-NEXT: li a0, 3
+; RV64I-NEXT: sb a0, 0(sp)
+; RV64I-NEXT: lbu a0, 0(sp)
+; RV64I-NEXT: addi sp, sp, 64
+; RV64I-NEXT: ret
+;
+; RV32I-LABEL: f0:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -64
+; RV32I-NEXT: li a0, 3
+; RV32I-NEXT: sb a0, 0(sp)
+; RV32I-NEXT: lbu a0, 0(sp)
+; RV32I-NEXT: addi sp, sp, 64
+; RV32I-NEXT: ret
+entry:
+ %a = alloca i8, i64 64
+ %b = getelementptr inbounds i8, ptr %a, i64 63
+ store volatile i8 3, ptr %a
+ %c = load volatile i8, ptr %a
+ ret i8 %c
+}
+
+define i8 @f1() #0 nounwind {
+; RV64I-LABEL: f1:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lui a0, 1
+; RV64I-NEXT: sub sp, sp, a0
+; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: li a0, 3
+; RV64I-NEXT: sb a0, 16(sp)
+; RV64I-NEXT: lbu a0, 16(sp)
+; RV64I-NEXT: lui a1, 1
+; RV64I-NEXT: addiw a1, a1, 16
+; RV64I-NEXT: add sp, sp, a1
+; RV64I-NEXT: ret
+;
+; RV32I-LABEL: f1:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a0, 1
+; RV32I-NEXT: sub sp, sp, a0
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: li a0, 3
+; RV32I-NEXT: sb a0, 16(sp)
+; RV32I-NEXT: lbu a0, 16(sp)
+; RV32I-NEXT: lui a1, 1
+; RV32I-NEXT: addi a1, a1, 16
+; RV32I-NEXT: add sp, sp, a1
+; RV32I-NEXT: ret
+entry:
+ %a = alloca i8, i64 4096
+ %b = getelementptr inbounds i8, ptr %a, i64 63
+ store volatile i8 3, ptr %a
+ %c = load volatile i8, ptr %a
+ ret i8 %c
+}
+
+define i8 @f2() #0 nounwind {
+; RV64I-LABEL: f2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lui a0, 16
+; RV64I-NEXT: sub t1, sp, a0
+; RV64I-NEXT: lui t2, 1
+; RV64I-NEXT: .LBB2_1: # %entry
+; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT: sub sp, sp, t2
+; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: bne sp, t1, .LBB2_1
+; RV64I-NEXT: # %bb.2: # %entry
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: li a0, 3
+; RV64I-NEXT: sb a0, 16(sp)
+; RV64I-NEXT: lbu a0, 16(sp)
+; RV64I-NEXT: lui a1, 16
+; RV64I-NEXT: addiw a1, a1, 16
+; RV64I-NEXT: add sp, sp, a1
+; RV64I-NEXT: ret
+;
+; RV32I-LABEL: f2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a0, 16
+; RV32I-NEXT: sub t1, sp, a0
+; RV32I-NEXT: lui t2, 1
+; RV32I-NEXT: .LBB2_1: # %entry
+; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT: sub sp, sp, t2
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: bne sp, t1, .LBB2_1
+; RV32I-NEXT: # %bb.2: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: li a0, 3
+; RV32I-NEXT: sb a0, 16(sp)
+; RV32I-NEXT: lbu a0, 16(sp)
+; RV32I-NEXT: lui a1, 16
+; RV32I-NEXT: addi a1, a1, 16
+; RV32I-NEXT: add sp, sp, a1
+; RV32I-NEXT: ret
+entry:
+ %a = alloca i8, i64 65536
+ %b = getelementptr inbounds i8, ptr %a, i64 63
+ store volatile i8 3, ptr %a
+ %c = load volatile i8, ptr %a
+ ret i8 %c
+}
+
+define i8 @f3() #0 "stack-probe-size"="32768" nounwind {
+; RV64I-LABEL: f3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lui a0, 8
+; RV64I-NEXT: sub sp, sp, a0
+; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: lui a0, 8
+; RV64I-NEXT: sub sp, sp, a0
+; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: li a0, 3
+; RV64I-NEXT: sb a0, 16(sp)
+; RV64I-NEXT: lbu a0, 16(sp)
+; RV64I-NEXT: lui a1, 16
+; RV64I-NEXT: addiw a1, a1, 16
+; RV64I-NEXT: add sp, sp, a1
+; RV64I-NEXT: ret
+;
+; RV32I-LABEL: f3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a0, 8
+; RV32I-NEXT: sub sp, sp, a0
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: lui a0, 8
+; RV32I-NEXT: sub sp, sp, a0
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: li a0, 3
+; RV32I-NEXT: sb a0, 16(sp)
+; RV32I-NEXT: lbu a0, 16(sp)
+; RV32I-NEXT: lui a1, 16
+; RV32I-NEXT: addi a1, a1, 16
+; RV32I-NEXT: add sp, sp, a1
+; RV32I-NEXT: ret
+entry:
+ %a = alloca i8, i64 65536
+ %b = getelementptr inbounds i8, ptr %a, i64 63
+ store volatile i8 3, ptr %a
+ %c = load volatile i8, ptr %a
+ ret i8 %c
+}
+
+; Same as f2, but without protection.
+define i8 @f4() nounwind {
+; RV64I-LABEL: f4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lui a0, 16
+; RV64I-NEXT: addiw a0, a0, 16
+; RV64I-NEXT: sub sp, sp, a0
+; RV64I-NEXT: li a0, 3
+; RV64I-NEXT: sb a0, 16(sp)
+; RV64I-NEXT: lbu a0, 16(sp)
+; RV64I-NEXT: lui a1, 16
+; RV64I-NEXT: addiw a1, a1, 16
+; RV64I-NEXT: add sp, sp, a1
+; RV64I-NEXT: ret
+;
+; RV32I-LABEL: f4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a0, 16
+; RV32I-NEXT: addi a0, a0, 16
+; RV32I-NEXT: sub sp, sp, a0
+; RV32I-NEXT: li a0, 3
+; RV32I-NEXT: sb a0, 16(sp)
+; RV32I-NEXT: lbu a0, 16(sp)
+; RV32I-NEXT: lui a1, 16
+; RV32I-NEXT: addi a1, a1, 16
+; RV32I-NEXT: add sp, sp, a1
+; RV32I-NEXT: ret
+entry:
+ %a = alloca i8, i64 65536
+ %b = getelementptr inbounds i8, ptr %a, i64 63
+ store volatile i8 3, ptr %a
+ %c = load volatile i8, ptr %a
+ ret i8 %c
+}
+
+define i8 @f5() #0 "stack-probe-size"="65536" nounwind {
+; RV64I-LABEL: f5:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lui a0, 256
+; RV64I-NEXT: sub t1, sp, a0
+; RV64I-NEXT: lui t2, 16
+; RV64I-NEXT: .LBB5_1: # %entry
+; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT: sub sp, sp, t2
+; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: bne sp, t1, .LBB5_1
+; RV64I-NEXT: # %bb.2: # %entry
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: li a0, 3
+; RV64I-NEXT: sb a0, 16(sp)
+; RV64I-NEXT: lbu a0, 16(sp)
+; RV64I-NEXT: lui a1, 256
+; RV64I-NEXT: addiw a1, a1, 16
+; RV64I-NEXT: add sp, sp, a1
+; RV64I-NEXT: ret
+;
+; RV32I-LABEL: f5:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a0, 256
+; RV32I-NEXT: sub t1, sp, a0
+; RV32I-NEXT: lui t2, 16
+; RV32I-NEXT: .LBB5_1: # %entry
+; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT: sub sp, sp, t2
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: bne sp, t1, .LBB5_1
+; RV32I-NEXT: # %bb.2: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: li a0, 3
+; RV32I-NEXT: sb a0, 16(sp)
+; RV32I-NEXT: lbu a0, 16(sp)
+; RV32I-NEXT: lui a1, 256
+; RV32I-NEXT: addi a1, a1, 16
+; RV32I-NEXT: add sp, sp, a1
+; RV32I-NEXT: ret
+entry:
+ %a = alloca i8, i64 1048576
+ %b = getelementptr inbounds i8, ptr %a, i64 63
+ store volatile i8 3, ptr %a
+ %c = load volatile i8, ptr %a
+ ret i8 %c
+}
+
+define i8 @f6() #0 nounwind {
+; RV64I-LABEL: f6:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lui a0, 262144
+; RV64I-NEXT: sub t1, sp, a0
+; RV64I-NEXT: lui t2, 1
+; RV64I-NEXT: .LBB6_1: # %entry
+; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT: sub sp, sp, t2
+; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: bne sp, t1, .LBB6_1
+; RV64I-NEXT: # %bb.2: # %entry
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: li a0, 3
+; RV64I-NEXT: sb a0, 16(sp)
+; RV64I-NEXT: lbu a0, 16(sp)
+; RV64I-NEXT: lui a1, 262144
+; RV64I-NEXT: addiw a1, a1, 16
+; RV64I-NEXT: add sp, sp, a1
+; RV64I-NEXT: ret
+;
+; RV32I-LABEL: f6:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a0, 262144
+; RV32I-NEXT: sub t1, sp, a0
+; RV32I-NEXT: lui t2, 1
+; RV32I-NEXT: .LBB6_1: # %entry
+; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT: sub sp, sp, t2
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: bne sp, t1, .LBB6_1
+; RV32I-NEXT: # %bb.2: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: li a0, 3
+; RV32I-NEXT: sb a0, 16(sp)
+; RV32I-NEXT: lbu a0, 16(sp)
+; RV32I-NEXT: lui a1, 262144
+; RV32I-NEXT: addi a1, a1, 16
+; RV32I-NEXT: add sp, sp, a1
+; RV32I-NEXT: ret
+entry:
+ %a = alloca i8, i64 1073741824
+ %b = getelementptr inbounds i8, ptr %a, i64 63
+ store volatile i8 3, ptr %a
+ %c = load volatile i8, ptr %a
+ ret i8 %c
+}
+
+define i8 @f7() #0 "stack-probe-size"="65536" nounwind {
+; RV64I-LABEL: f7:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lui a0, 244128
+; RV64I-NEXT: sub t1, sp, a0
+; RV64I-NEXT: lui t2, 16
+; RV64I-NEXT: .LBB7_1: # %entry
+; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT: sub sp, sp, t2
+; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: bne sp, t1, .LBB7_1
+; RV64I-NEXT: # %bb.2: # %entry
+; RV64I-NEXT: lui a0, 13
+; RV64I-NEXT: addiw a0, a0, -1520
+; RV64I-NEXT: sub sp, sp, a0
+; RV64I-NEXT: li a0, 3
+; RV64I-NEXT: sb a0, 9(sp)
+; RV64I-NEXT: lbu a0, 9(sp)
+; RV64I-NEXT: lui a1, 244141
+; RV64I-NEXT: addiw a1, a1, -1520
+; RV64I-NEXT: add sp, sp, a1
+; RV64I-NEXT: ret
+;
+; RV32I-LABEL: f7:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a0, 244128
+; RV32I-NEXT: sub t1, sp, a0
+; RV32I-NEXT: lui t2, 16
+; RV32I-NEXT: .LBB7_1: # %entry
+; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT: sub sp, sp, t2
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: bne sp, t1, .LBB7_1
+; RV32I-NEXT: # %bb.2: # %entry
+; RV32I-NEXT: lui a0, 13
+; RV32I-NEXT: addi a0, a0, -1520
+; RV32I-NEXT: sub sp, sp, a0
+; RV32I-NEXT: li a0, 3
+; RV32I-NEXT: sb a0, 9(sp)
+; RV32I-NEXT: lbu a0, 9(sp)
+; RV32I-NEXT: lui a1, 244141
+; RV32I-NEXT: addi a1, a1, -1520
+; RV32I-NEXT: add sp, sp, a1
+; RV32I-NEXT: ret
+entry:
+ %a = alloca i8, i64 1000000007
+ %b = getelementptr inbounds i8, ptr %a, i64 101
+ store volatile i8 3, ptr %a
+ %c = load volatile i8, ptr %a
+ ret i8 %c
+}
+
+attributes #0 = { "probe-stack"="inline-asm" }
diff --git a/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll b/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll
new file mode 100644
index 00000000000000..18af080e86747b
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll
@@ -0,0 +1,541 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+m -O2 < %s \
+; RUN: | FileCheck %s -check-prefix=RV64I
+; RUN: llc -mtriple=riscv32 -mattr=+m -O2 < %s \
+; RUN: | FileCheck %s -check-prefix=RV32I
+
+; Tests copied from PowerPC.
+
+; Free probe
+define i8 @f0() #0 {
+; RV64I-LABEL: f0:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -64
+; RV64I-NEXT: .cfi_def_cfa_offset 64
+; RV64I-NEXT: li a0, 3
+; RV64I-NEXT: sb a0, 0(sp)
+; RV64I-NEXT: lbu a0, 0(sp)
+; RV64I-NEXT: addi sp, sp, 64
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-LABEL: f0:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -64
+; RV32I-NEXT: .cfi_def_cfa_offset 64
+; RV32I-NEXT: li a0, 3
+; RV32I-NEXT: sb a0, 0(sp)
+; RV32I-NEXT: lbu a0, 0(sp)
+; RV32I-NEXT: addi sp, sp, 64
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+entry:
+ %a = alloca i8, i64 64
+ %b = getelementptr inbounds i8, ptr %a, i64 63
+ store volatile i8 3, ptr %a
+ %c = load volatile i8, ptr %a
+ ret i8 %c
+}
+
+define i8 @f1() #0 {
+; RV64I-LABEL: f1:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lui a0, 1
+; RV64I-NEXT: sub sp, sp, a0
+; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: .cfi_def_cfa_offset 4096
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: .cfi_def_cfa_offset 4112
+; RV64I-NEXT: li a0, 3
+; RV64I-NEXT: sb a0, 16(sp)
+; RV64I-NEXT: lbu a0, 16(sp)
+; RV64I-NEXT: lui a1, 1
+; RV64I-NEXT: addiw a1, a1, 16
+; RV64I-NEXT: add sp, sp, a1
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-LABEL: f1:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a0, 1
+; RV32I-NEXT: sub sp, sp, a0
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: .cfi_def_cfa_offset 4096
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 4112
+; RV32I-NEXT: li a0, 3
+; RV32I-NEXT: sb a0, 16(sp)
+; RV32I-NEXT: lbu a0, 16(sp)
+; RV32I-NEXT: lui a1, 1
+; RV32I-NEXT: addi a1, a1, 16
+; RV32I-NEXT: add sp, sp, a1
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+entry:
+ %a = alloca i8, i64 4096
+ %b = getelementptr inbounds i8, ptr %a, i64 63
+ store volatile i8 3, ptr %a
+ %c = load volatile i8, ptr %a
+ ret i8 %c
+}
+
+define i8 @f2() #0 {
+; RV64I-LABEL: f2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lui a0, 16
+; RV64I-NEXT: sub t1, sp, a0
+; RV64I-NEXT: .cfi_def_cfa t1, 65536
+; RV64I-NEXT: lui t2, 1
+; RV64I-NEXT: .LBB2_1: # %entry
+; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT: sub sp, sp, t2
+; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: bne sp, t1, .LBB2_1
+; RV64I-NEXT: # %bb.2: # %entry
+; RV64I-NEXT: .cfi_def_cfa_register sp
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: .cfi_def_cfa_offset 65552
+; RV64I-NEXT: li a0, 3
+; RV64I-NEXT: sb a0, 16(sp)
+; RV64I-NEXT: lbu a0, 16(sp)
+; RV64I-NEXT: lui a1, 16
+; RV64I-NEXT: addiw a1, a1, 16
+; RV64I-NEXT: add sp, sp, a1
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-LABEL: f2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a0, 16
+; RV32I-NEXT: sub t1, sp, a0
+; RV32I-NEXT: .cfi_def_cfa t1, 65536
+; RV32I-NEXT: lui t2, 1
+; RV32I-NEXT: .LBB2_1: # %entry
+; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT: sub sp, sp, t2
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: bne sp, t1, .LBB2_1
+; RV32I-NEXT: # %bb.2: # %entry
+; RV32I-NEXT: .cfi_def_cfa_register sp
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 65552
+; RV32I-NEXT: li a0, 3
+; RV32I-NEXT: sb a0, 16(sp)
+; RV32I-NEXT: lbu a0, 16(sp)
+; RV32I-NEXT: lui a1, 16
+; RV32I-NEXT: addi a1, a1, 16
+; RV32I-NEXT: add sp, sp, a1
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+entry:
+ %a = alloca i8, i64 65536
+ %b = getelementptr inbounds i8, ptr %a, i64 63
+ store volatile i8 3, ptr %a
+ %c = load volatile i8, ptr %a
+ ret i8 %c
+}
+
+define i8 @f3() #0 "stack-probe-size"="32768" {
+; RV64I-LABEL: f3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lui a0, 8
+; RV64I-NEXT: sub sp, sp, a0
+; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: .cfi_def_cfa_offset 32768
+; RV64I-NEXT: lui a0, 8
+; RV64I-NEXT: sub sp, sp, a0
+; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: .cfi_def_cfa_offset 65536
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: .cfi_def_cfa_offset 65552
+; RV64I-NEXT: li a0, 3
+; RV64I-NEXT: sb a0, 16(sp)
+; RV64I-NEXT: lbu a0, 16(sp)
+; RV64I-NEXT: lui a1, 16
+; RV64I-NEXT: addiw a1, a1, 16
+; RV64I-NEXT: add sp, sp, a1
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-LABEL: f3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a0, 8
+; RV32I-NEXT: sub sp, sp, a0
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: .cfi_def_cfa_offset 32768
+; RV32I-NEXT: lui a0, 8
+; RV32I-NEXT: sub sp, sp, a0
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: .cfi_def_cfa_offset 65536
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 65552
+; RV32I-NEXT: li a0, 3
+; RV32I-NEXT: sb a0, 16(sp)
+; RV32I-NEXT: lbu a0, 16(sp)
+; RV32I-NEXT: lui a1, 16
+; RV32I-NEXT: addi a1, a1, 16
+; RV32I-NEXT: add sp, sp, a1
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+entry:
+ %a = alloca i8, i64 65536
+ %b = getelementptr inbounds i8, ptr %a, i64 63
+ store volatile i8 3, ptr %a
+ %c = load volatile i8, ptr %a
+ ret i8 %c
+}
+
+; Same as f2, but without protection.
+define i8 @f4() {
+; RV64I-LABEL: f4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lui a0, 16
+; RV64I-NEXT: addiw a0, a0, 16
+; RV64I-NEXT: sub sp, sp, a0
+; RV64I-NEXT: .cfi_def_cfa_offset 65552
+; RV64I-NEXT: li a0, 3
+; RV64I-NEXT: sb a0, 16(sp)
+; RV64I-NEXT: lbu a0, 16(sp)
+; RV64I-NEXT: lui a1, 16
+; RV64I-NEXT: addiw a1, a1, 16
+; RV64I-NEXT: add sp, sp, a1
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-LABEL: f4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a0, 16
+; RV32I-NEXT: addi a0, a0, 16
+; RV32I-NEXT: sub sp, sp, a0
+; RV32I-NEXT: .cfi_def_cfa_offset 65552
+; RV32I-NEXT: li a0, 3
+; RV32I-NEXT: sb a0, 16(sp)
+; RV32I-NEXT: lbu a0, 16(sp)
+; RV32I-NEXT: lui a1, 16
+; RV32I-NEXT: addi a1, a1, 16
+; RV32I-NEXT: add sp, sp, a1
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+entry:
+ %a = alloca i8, i64 65536
+ %b = getelementptr inbounds i8, ptr %a, i64 63
+ store volatile i8 3, ptr %a
+ %c = load volatile i8, ptr %a
+ ret i8 %c
+}
+
+define i8 @f5() #0 "stack-probe-size"="65536" {
+; RV64I-LABEL: f5:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lui a0, 256
+; RV64I-NEXT: sub t1, sp, a0
+; RV64I-NEXT: .cfi_def_cfa t1, 1048576
+; RV64I-NEXT: lui t2, 16
+; RV64I-NEXT: .LBB5_1: # %entry
+; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT: sub sp, sp, t2
+; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: bne sp, t1, .LBB5_1
+; RV64I-NEXT: # %bb.2: # %entry
+; RV64I-NEXT: .cfi_def_cfa_register sp
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: .cfi_def_cfa_offset 1048592
+; RV64I-NEXT: li a0, 3
+; RV64I-NEXT: sb a0, 16(sp)
+; RV64I-NEXT: lbu a0, 16(sp)
+; RV64I-NEXT: lui a1, 256
+; RV64I-NEXT: addiw a1, a1, 16
+; RV64I-NEXT: add sp, sp, a1
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-LABEL: f5:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a0, 256
+; RV32I-NEXT: sub t1, sp, a0
+; RV32I-NEXT: .cfi_def_cfa t1, 1048576
+; RV32I-NEXT: lui t2, 16
+; RV32I-NEXT: .LBB5_1: # %entry
+; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT: sub sp, sp, t2
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: bne sp, t1, .LBB5_1
+; RV32I-NEXT: # %bb.2: # %entry
+; RV32I-NEXT: .cfi_def_cfa_register sp
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 1048592
+; RV32I-NEXT: li a0, 3
+; RV32I-NEXT: sb a0, 16(sp)
+; RV32I-NEXT: lbu a0, 16(sp)
+; RV32I-NEXT: lui a1, 256
+; RV32I-NEXT: addi a1, a1, 16
+; RV32I-NEXT: add sp, sp, a1
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+entry:
+ %a = alloca i8, i64 1048576
+ %b = getelementptr inbounds i8, ptr %a, i64 63
+ store volatile i8 3, ptr %a
+ %c = load volatile i8, ptr %a
+ ret i8 %c
+}
+
+define i8 @f6() #0 {
+; RV64I-LABEL: f6:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lui a0, 262144
+; RV64I-NEXT: sub t1, sp, a0
+; RV64I-NEXT: .cfi_def_cfa t1, 1073741824
+; RV64I-NEXT: lui t2, 1
+; RV64I-NEXT: .LBB6_1: # %entry
+; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT: sub sp, sp, t2
+; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: bne sp, t1, .LBB6_1
+; RV64I-NEXT: # %bb.2: # %entry
+; RV64I-NEXT: .cfi_def_cfa_register sp
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: .cfi_def_cfa_offset 1073741840
+; RV64I-NEXT: li a0, 3
+; RV64I-NEXT: sb a0, 16(sp)
+; RV64I-NEXT: lbu a0, 16(sp)
+; RV64I-NEXT: lui a1, 262144
+; RV64I-NEXT: addiw a1, a1, 16
+; RV64I-NEXT: add sp, sp, a1
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-LABEL: f6:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a0, 262144
+; RV32I-NEXT: sub t1, sp, a0
+; RV32I-NEXT: .cfi_def_cfa t1, 1073741824
+; RV32I-NEXT: lui t2, 1
+; RV32I-NEXT: .LBB6_1: # %entry
+; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT: sub sp, sp, t2
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: bne sp, t1, .LBB6_1
+; RV32I-NEXT: # %bb.2: # %entry
+; RV32I-NEXT: .cfi_def_cfa_register sp
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 1073741840
+; RV32I-NEXT: li a0, 3
+; RV32I-NEXT: sb a0, 16(sp)
+; RV32I-NEXT: lbu a0, 16(sp)
+; RV32I-NEXT: lui a1, 262144
+; RV32I-NEXT: addi a1, a1, 16
+; RV32I-NEXT: add sp, sp, a1
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+entry:
+ %a = alloca i8, i64 1073741824
+ %b = getelementptr inbounds i8, ptr %a, i64 63
+ store volatile i8 3, ptr %a
+ %c = load volatile i8, ptr %a
+ ret i8 %c
+}
+
+define i8 @f7() #0 "stack-probe-size"="65536" {
+; RV64I-LABEL: f7:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lui a0, 244128
+; RV64I-NEXT: sub t1, sp, a0
+; RV64I-NEXT: .cfi_def_cfa t1, 999948288
+; RV64I-NEXT: lui t2, 16
+; RV64I-NEXT: .LBB7_1: # %entry
+; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT: sub sp, sp, t2
+; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: bne sp, t1, .LBB7_1
+; RV64I-NEXT: # %bb.2: # %entry
+; RV64I-NEXT: .cfi_def_cfa_register sp
+; RV64I-NEXT: lui a0, 13
+; RV64I-NEXT: addiw a0, a0, -1520
+; RV64I-NEXT: sub sp, sp, a0
+; RV64I-NEXT: .cfi_def_cfa_offset 1000000016
+; RV64I-NEXT: li a0, 3
+; RV64I-NEXT: sb a0, 9(sp)
+; RV64I-NEXT: lbu a0, 9(sp)
+; RV64I-NEXT: lui a1, 244141
+; RV64I-NEXT: addiw a1, a1, -1520
+; RV64I-NEXT: add sp, sp, a1
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-LABEL: f7:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a0, 244128
+; RV32I-NEXT: sub t1, sp, a0
+; RV32I-NEXT: .cfi_def_cfa t1, 999948288
+; RV32I-NEXT: lui t2, 16
+; RV32I-NEXT: .LBB7_1: # %entry
+; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT: sub sp, sp, t2
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: bne sp, t1, .LBB7_1
+; RV32I-NEXT: # %bb.2: # %entry
+; RV32I-NEXT: .cfi_def_cfa_register sp
+; RV32I-NEXT: lui a0, 13
+; RV32I-NEXT: addi a0, a0, -1520
+; RV32I-NEXT: sub sp, sp, a0
+; RV32I-NEXT: .cfi_def_cfa_offset 1000000016
+; RV32I-NEXT: li a0, 3
+; RV32I-NEXT: sb a0, 9(sp)
+; RV32I-NEXT: lbu a0, 9(sp)
+; RV32I-NEXT: lui a1, 244141
+; RV32I-NEXT: addi a1, a1, -1520
+; RV32I-NEXT: add sp, sp, a1
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+entry:
+ %a = alloca i8, i64 1000000007
+ %b = getelementptr inbounds i8, ptr %a, i64 101
+ store volatile i8 3, ptr %a
+ %c = load volatile i8, ptr %a
+ ret i8 %c
+}
+
+; alloca + align < probe_size
+define i32 @f8(i64 %i) local_unnamed_addr #0 {
+; RV64I-LABEL: f8:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -832
+; RV64I-NEXT: .cfi_def_cfa_offset 832
+; RV64I-NEXT: sd ra, 824(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 816(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: addi s0, sp, 832
+; RV64I-NEXT: .cfi_def_cfa s0, 0
+; RV64I-NEXT: andi sp, sp, -64
+; RV64I-NEXT: slli a0, a0, 2
+; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: li a1, 1
+; RV64I-NEXT: sw a1, 0(a0)
+; RV64I-NEXT: lw a0, 0(sp)
+; RV64I-NEXT: addi sp, s0, -832
+; RV64I-NEXT: .cfi_def_cfa sp, 832
+; RV64I-NEXT: ld ra, 824(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 816(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: addi sp, sp, 832
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-LABEL: f8:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -832
+; RV32I-NEXT: .cfi_def_cfa_offset 832
+; RV32I-NEXT: sw ra, 828(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 824(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: addi s0, sp, 832
+; RV32I-NEXT: .cfi_def_cfa s0, 0
+; RV32I-NEXT: andi sp, sp, -64
+; RV32I-NEXT: slli a0, a0, 2
+; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: li a1, 1
+; RV32I-NEXT: sw a1, 0(a0)
+; RV32I-NEXT: lw a0, 0(sp)
+; RV32I-NEXT: addi sp, s0, -832
+; RV32I-NEXT: .cfi_def_cfa sp, 832
+; RV32I-NEXT: lw ra, 828(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 824(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: addi sp, sp, 832
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+ %a = alloca i32, i32 200, align 64
+ %b = getelementptr inbounds i32, ptr %a, i64 %i
+ store volatile i32 1, ptr %b
+ %c = load volatile i32, ptr %a
+ ret i32 %c
+}
+
+; alloca > probe_size, align > probe_size
+define i32 @f9(i64 %i) local_unnamed_addr #0 {
+; RV64I-LABEL: f9:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -2032
+; RV64I-NEXT: .cfi_def_cfa_offset 2032
+; RV64I-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 2016(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: addi s0, sp, 2032
+; RV64I-NEXT: .cfi_def_cfa s0, 0
+; RV64I-NEXT: lui a1, 1
+; RV64I-NEXT: sub sp, sp, a1
+; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: sub sp, sp, a1
+; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: andi sp, sp, -2048
+; RV64I-NEXT: slli a0, a0, 2
+; RV64I-NEXT: addi a1, sp, 2047
+; RV64I-NEXT: addi a1, a1, 1
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: li a1, 1
+; RV64I-NEXT: sw a1, 0(a0)
+; RV64I-NEXT: lui a0, 1
+; RV64I-NEXT: add a0, sp, a0
+; RV64I-NEXT: lw a0, -2048(a0)
+; RV64I-NEXT: addi sp, s0, -2032
+; RV64I-NEXT: .cfi_def_cfa sp, 2032
+; RV64I-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: addi sp, sp, 2032
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-LABEL: f9:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -2032
+; RV32I-NEXT: .cfi_def_cfa_offset 2032
+; RV32I-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 2024(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: addi s0, sp, 2032
+; RV32I-NEXT: .cfi_def_cfa s0, 0
+; RV32I-NEXT: lui a1, 1
+; RV32I-NEXT: sub sp, sp, a1
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: sub sp, sp, a1
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: andi sp, sp, -2048
+; RV32I-NEXT: slli a0, a0, 2
+; RV32I-NEXT: addi a1, sp, 2047
+; RV32I-NEXT: addi a1, a1, 1
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: li a1, 1
+; RV32I-NEXT: sw a1, 0(a0)
+; RV32I-NEXT: lui a0, 1
+; RV32I-NEXT: add a0, sp, a0
+; RV32I-NEXT: lw a0, -2048(a0)
+; RV32I-NEXT: addi sp, s0, -2032
+; RV32I-NEXT: .cfi_def_cfa sp, 2032
+; RV32I-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 2024(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: addi sp, sp, 2032
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+ %a = alloca i32, i32 2000, align 2048
+ %b = getelementptr inbounds i32, ptr %a, i64 %i
+ store volatile i32 1, ptr %b
+ %c = load volatile i32, ptr %a
+ ret i32 %c
+}
+
+attributes #0 = { "probe-stack"="inline-asm" }
More information about the llvm-commits
mailing list