[clang] [llvm] [RISCV] Add stack clash protection (PR #117612)
Raphael Moreira Zinsly via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 27 04:12:07 PST 2024
https://github.com/rzinsly updated https://github.com/llvm/llvm-project/pull/117612
>From f6bb44ca2242623399eb0ea946f38399fed3807c Mon Sep 17 00:00:00 2001
From: Raphael Moreira Zinsly <rzinsly at ventanamicro.com>
Date: Tue, 26 Nov 2024 16:40:37 -0300
Subject: [PATCH 1/3] [NFC][RISCV] Remove CFIIndex argument from
RISCVFrameLowering::allocateStack
Calculates CFIIndex inside RISCVFrameLowering::allocateStack instead of
sending it by argument.
---
llvm/lib/Target/RISCV/RISCVFrameLowering.cpp | 23 +++++++++-----------
llvm/lib/Target/RISCV/RISCVFrameLowering.h | 3 ++-
2 files changed, 12 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 1ff435b76ad68a..e2c9baa1b7b1f6 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -612,8 +612,9 @@ static MCCFIInstruction createDefCFAOffset(const TargetRegisterInfo &TRI,
void RISCVFrameLowering::allocateStack(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- StackOffset Offset, bool EmitCFI,
- unsigned CFIIndex) const {
+ MachineFunction &MF, StackOffset Offset,
+ uint64_t RealStackSize,
+ bool EmitCFI) const {
DebugLoc DL;
const RISCVRegisterInfo *RI = STI.getRegisterInfo();
const RISCVInstrInfo *TII = STI.getInstrInfo();
@@ -622,7 +623,9 @@ void RISCVFrameLowering::allocateStack(MachineBasicBlock &MBB,
getStackAlign());
if (EmitCFI) {
- // Emit ".cfi_def_cfa_offset StackSize"
+ // Emit ".cfi_def_cfa_offset RealStackSize"
+ unsigned CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, RealStackSize));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlag(MachineInstr::FrameSetup);
@@ -745,10 +748,8 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
if (StackSize != 0) {
// Allocate space on the stack if necessary.
- unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::cfiDefCfaOffset(nullptr, RealStackSize));
- allocateStack(MBB, MBBI, StackOffset::getFixed(-StackSize),
- /*EmitCFI=*/ true, CFIIndex);
+ allocateStack(MBB, MBBI, MF, StackOffset::getFixed(-StackSize),
+ RealStackSize, /*EmitCFI=*/ true);
}
// The frame pointer is callee-saved, and code has been generated for us to
@@ -790,12 +791,8 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
assert(SecondSPAdjustAmount > 0 &&
"SecondSPAdjustAmount should be greater than zero");
- // If we are using a frame-pointer, and thus emitted ".cfi_def_cfa fp, 0",
- // don't emit an sp-based .cfi_def_cfa_offset
- unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(
- nullptr, getStackSizeWithRVVPadding(MF)));
- allocateStack(MBB, MBBI, StackOffset::getFixed(-SecondSPAdjustAmount),
- !hasFP(MF), CFIIndex);
+ allocateStack(MBB, MBBI, MF, StackOffset::getFixed(-SecondSPAdjustAmount),
+ getStackSizeWithRVVPadding(MF), !hasFP(MF));
}
if (RVVStackSize) {
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
index 9aff4dc9e4089a..8c2e3f0f3e5638 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
@@ -79,7 +79,8 @@ class RISCVFrameLowering : public TargetFrameLowering {
}
void allocateStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- StackOffset Offset, bool EmitCFI, unsigned CFIIndex) const;
+ MachineFunction &MF, StackOffset Offset,
+ uint64_t RealStackSize, bool EmitCFI) const;
protected:
const RISCVSubtarget &STI;
>From 6a09d85096bdf08d0dfce5c1cb8b2cb22ec25eba Mon Sep 17 00:00:00 2001
From: Raphael Moreira Zinsly <rzinsly at ventanamicro.com>
Date: Mon, 25 Nov 2024 14:51:35 -0300
Subject: [PATCH 2/3] [RISCV] Add initial stack clash protection
Enable `-fstack-clash-protection` for RISCV and stack probe for function
prologues.
We probe the stack by creating an unrolled loop that allocates and probe
the stack in ProbeSize chunks, this is not ideal if the loop has many
iterations.
---
clang/lib/Driver/ToolChains/Clang.cpp | 3 +-
llvm/lib/Target/RISCV/RISCVFrameLowering.cpp | 90 +++++++++++++++----
llvm/lib/Target/RISCV/RISCVFrameLowering.h | 5 +-
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 22 +++++
llvm/lib/Target/RISCV/RISCVISelLowering.h | 5 ++
.../Target/RISCV/RISCVMachineFunctionInfo.cpp | 30 +++++++
.../Target/RISCV/RISCVMachineFunctionInfo.h | 7 +-
llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 4 +-
.../RISCV/stack-clash-prologue-nounwind.ll | 72 +++++++++++++++
.../CodeGen/RISCV/stack-clash-prologue.ll | 78 ++++++++++++++++
10 files changed, 291 insertions(+), 25 deletions(-)
create mode 100644 llvm/test/CodeGen/RISCV/stack-clash-prologue-nounwind.ll
create mode 100644 llvm/test/CodeGen/RISCV/stack-clash-prologue.ll
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index d3eec9fea0d498..48fc43a8f968bc 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -3774,7 +3774,8 @@ static void RenderSCPOptions(const ToolChain &TC, const ArgList &Args,
return;
if (!EffectiveTriple.isX86() && !EffectiveTriple.isSystemZ() &&
- !EffectiveTriple.isPPC64() && !EffectiveTriple.isAArch64())
+ !EffectiveTriple.isPPC64() && !EffectiveTriple.isAArch64() &&
+ !EffectiveTriple.isRISCV())
return;
Args.addOptInFlag(CmdArgs, options::OPT_fstack_clash_protection,
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index e2c9baa1b7b1f6..128b49eae3d846 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -610,26 +610,74 @@ static MCCFIInstruction createDefCFAOffset(const TargetRegisterInfo &TRI,
Comment.str());
}
+// Allocate stack space and probe it if necessary.
void RISCVFrameLowering::allocateStack(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- MachineFunction &MF, StackOffset Offset,
- uint64_t RealStackSize,
- bool EmitCFI) const {
+ MachineFunction &MF, uint64_t Offset,
+ uint64_t RealStackSize, bool EmitCFI,
+ bool NeedProbe,
+ uint64_t ProbeSize) const {
DebugLoc DL;
const RISCVRegisterInfo *RI = STI.getRegisterInfo();
const RISCVInstrInfo *TII = STI.getInstrInfo();
- RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, Offset, MachineInstr::FrameSetup,
- getStackAlign());
+ // Simply allocate the stack if it's not big enough to require a probe.
+ if (!NeedProbe || Offset <= ProbeSize) {
+ RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(-Offset),
+ MachineInstr::FrameSetup, getStackAlign());
- if (EmitCFI) {
- // Emit ".cfi_def_cfa_offset RealStackSize"
- unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::cfiDefCfaOffset(nullptr, RealStackSize));
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlag(MachineInstr::FrameSetup);
+ if (EmitCFI) {
+ // Emit ".cfi_def_cfa_offset RealStackSize"
+ unsigned CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, RealStackSize));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ return;
}
+
+ // Do an unrolled probe loop.
+ uint64_t CurrentOffset = 0;
+ bool IsRV64 = STI.is64Bit();
+ while (CurrentOffset + ProbeSize <= Offset) {
+ RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
+ StackOffset::getFixed(-ProbeSize), MachineInstr::FrameSetup,
+ getStackAlign());
+ // s[d|w] zero, 0(sp)
+ BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
+ .addReg(RISCV::X0)
+ .addReg(SPReg)
+ .addImm(0)
+ .setMIFlags(MachineInstr::FrameSetup);
+
+ CurrentOffset += ProbeSize;
+ if (EmitCFI) {
+ // Emit ".cfi_def_cfa_offset CurrentOffset"
+ unsigned CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, CurrentOffset));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ }
+
+ uint64_t Residual = Offset - CurrentOffset;
+ if (Residual) {
+ RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(-Residual),
+ MachineInstr::FrameSetup, getStackAlign());
+ if (EmitCFI) {
+ // Emit ".cfi_def_cfa_offset Offset"
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Offset));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ }
+
+ return;
}
void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
@@ -746,11 +794,14 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
getPushOrLibCallsSavedInfo(MF, CSI));
}
- if (StackSize != 0) {
- // Allocate space on the stack if necessary.
- allocateStack(MBB, MBBI, MF, StackOffset::getFixed(-StackSize),
- RealStackSize, /*EmitCFI=*/ true);
- }
+ // Allocate space on the stack if necessary.
+ auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();
+ const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
+ bool NeedProbe = TLI->hasInlineStackProbe(MF);
+ uint64_t ProbeSize = TLI->getStackProbeSize(MF, getStackAlign());
+ if (StackSize != 0)
+ allocateStack(MBB, MBBI, MF, StackSize, RealStackSize, /*EmitCFI=*/true,
+ NeedProbe, ProbeSize);
// The frame pointer is callee-saved, and code has been generated for us to
// save it to the stack. We need to skip over the storing of callee-saved
@@ -791,8 +842,9 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
assert(SecondSPAdjustAmount > 0 &&
"SecondSPAdjustAmount should be greater than zero");
- allocateStack(MBB, MBBI, MF, StackOffset::getFixed(-SecondSPAdjustAmount),
- getStackSizeWithRVVPadding(MF), !hasFP(MF));
+ allocateStack(MBB, MBBI, MF, SecondSPAdjustAmount,
+ getStackSizeWithRVVPadding(MF), !hasFP(MF), NeedProbe,
+ ProbeSize);
}
if (RVVStackSize) {
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
index 8c2e3f0f3e5638..6946e81dbd0675 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
@@ -79,8 +79,9 @@ class RISCVFrameLowering : public TargetFrameLowering {
}
void allocateStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- MachineFunction &MF, StackOffset Offset,
- uint64_t RealStackSize, bool EmitCFI) const;
+ MachineFunction &MF, uint64_t Offset,
+ uint64_t RealStackSize, bool EmitCFI, bool NeedProbe,
+ uint64_t ProbeSize) const;
protected:
const RISCVSubtarget &STI;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 8ca3e714c32c28..8f06ceb0026889 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -22156,3 +22156,25 @@ namespace llvm::RISCVVIntrinsicsTable {
#include "RISCVGenSearchableTables.inc"
} // namespace llvm::RISCVVIntrinsicsTable
+
+bool RISCVTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const {
+
+ // If the function specifically requests inline stack probes, emit them.
+ if (MF.getFunction().hasFnAttribute("probe-stack"))
+ return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
+ "inline-asm";
+
+ return false;
+}
+
+unsigned RISCVTargetLowering::getStackProbeSize(const MachineFunction &MF,
+ Align StackAlign) const {
+ // The default stack probe size is 4096 if the function has no
+ // stack-probe-size attribute.
+ const Function &Fn = MF.getFunction();
+ unsigned StackProbeSize =
+ Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
+ // Round down to the stack alignment.
+ StackProbeSize = alignDown(StackProbeSize, StackAlign.value());
+ return StackProbeSize ? StackProbeSize : StackAlign.value();
+}
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 7ada941563c1ff..f77c9772970144 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -923,6 +923,11 @@ class RISCVTargetLowering : public TargetLowering {
MachineBasicBlock::instr_iterator &MBBI,
const TargetInstrInfo *TII) const override;
+ /// True if stack clash protection is enabled for this functions.
+ bool hasInlineStackProbe(const MachineFunction &MF) const override;
+
+ unsigned getStackProbeSize(const MachineFunction &MF, Align StackAlign) const;
+
private:
void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo,
const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp
index d0c363042f5118..a0d79317638184 100644
--- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "RISCVMachineFunctionInfo.h"
+#include "llvm/IR/Module.h"
using namespace llvm;
@@ -26,6 +27,35 @@ MachineFunctionInfo *RISCVMachineFunctionInfo::clone(
return DestMF.cloneInfo<RISCVMachineFunctionInfo>(*this);
}
+RISCVMachineFunctionInfo::RISCVMachineFunctionInfo(const Function &F,
+ const RISCVSubtarget *STI) {
+
+ // The default stack probe size is 4096 if the function has no
+ // stack-probe-size attribute. This is a safe default because it is the
+ // smallest possible guard page size.
+ uint64_t ProbeSize = 4096;
+ if (F.hasFnAttribute("stack-probe-size"))
+ ProbeSize = F.getFnAttributeAsParsedInteger("stack-probe-size");
+ else if (const auto *PS = mdconst::extract_or_null<ConstantInt>(
+ F.getParent()->getModuleFlag("stack-probe-size")))
+ ProbeSize = PS->getZExtValue();
+ assert(int64_t(ProbeSize) > 0 && "Invalid stack probe size");
+
+ // Round down to the stack alignment.
+ uint64_t StackAlign =
+ STI->getFrameLowering()->getTransientStackAlign().value();
+ ProbeSize = std::max(StackAlign, alignDown(ProbeSize, StackAlign));
+ StringRef ProbeKind;
+ if (F.hasFnAttribute("probe-stack"))
+ ProbeKind = F.getFnAttribute("probe-stack").getValueAsString();
+ else if (const auto *PS = dyn_cast_or_null<MDString>(
+ F.getParent()->getModuleFlag("probe-stack")))
+ ProbeKind = PS->getString();
+ if (ProbeKind.size()) {
+ StackProbeSize = ProbeSize;
+ }
+}
+
void yaml::RISCVMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
MappingTraits<RISCVMachineFunctionInfo>::mapping(YamlIO, *this);
}
diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
index 779c652b4d8fc4..cf20cf8a5c25c5 100644
--- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
@@ -76,8 +76,10 @@ class RISCVMachineFunctionInfo : public MachineFunctionInfo {
unsigned RVPushRegs = 0;
int RVPushRlist = llvm::RISCVZC::RLISTENCODE::INVALID_RLIST;
+ int64_t StackProbeSize = 0;
+
public:
- RISCVMachineFunctionInfo(const Function &F, const TargetSubtargetInfo *STI) {}
+ RISCVMachineFunctionInfo(const Function &F, const RISCVSubtarget *STI);
MachineFunctionInfo *
clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
@@ -157,6 +159,9 @@ class RISCVMachineFunctionInfo : public MachineFunctionInfo {
bool isVectorCall() const { return IsVectorCall; }
void setIsVectorCall() { IsVectorCall = true; }
+
+ bool hasStackProbing() const { return StackProbeSize != 0; }
+ int64_t getStackProbeSize() const { return StackProbeSize; }
};
} // end namespace llvm
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index fa507653264ccd..53b178809ccac5 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -266,8 +266,8 @@ RISCVTargetMachine::getSubtargetImpl(const Function &F) const {
MachineFunctionInfo *RISCVTargetMachine::createMachineFunctionInfo(
BumpPtrAllocator &Allocator, const Function &F,
const TargetSubtargetInfo *STI) const {
- return RISCVMachineFunctionInfo::create<RISCVMachineFunctionInfo>(Allocator,
- F, STI);
+ return RISCVMachineFunctionInfo::create<RISCVMachineFunctionInfo>(
+ Allocator, F, static_cast<const RISCVSubtarget *>(STI));
}
TargetTransformInfo
diff --git a/llvm/test/CodeGen/RISCV/stack-clash-prologue-nounwind.ll b/llvm/test/CodeGen/RISCV/stack-clash-prologue-nounwind.ll
new file mode 100644
index 00000000000000..1cb134503fbd29
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/stack-clash-prologue-nounwind.ll
@@ -0,0 +1,72 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+m -O2 < %s \
+; RUN: | FileCheck %s -check-prefix=RV64I
+; RUN: llc -mtriple=riscv32 -mattr=+m -O2 < %s \
+; RUN: | FileCheck %s -check-prefix=RV32I
+
+; Tests copied from PowerPC.
+
+; Free probe
+define i8 @f0() #0 nounwind {
+; RV64I-LABEL: f0:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -64
+; RV64I-NEXT: li a0, 3
+; RV64I-NEXT: sb a0, 0(sp)
+; RV64I-NEXT: lbu a0, 0(sp)
+; RV64I-NEXT: addi sp, sp, 64
+; RV64I-NEXT: ret
+;
+; RV32I-LABEL: f0:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -64
+; RV32I-NEXT: li a0, 3
+; RV32I-NEXT: sb a0, 0(sp)
+; RV32I-NEXT: lbu a0, 0(sp)
+; RV32I-NEXT: addi sp, sp, 64
+; RV32I-NEXT: ret
+entry:
+ %a = alloca i8, i64 64
+ %b = getelementptr inbounds i8, ptr %a, i64 63
+ store volatile i8 3, ptr %a
+ %c = load volatile i8, ptr %a
+ ret i8 %c
+}
+
+define i8 @f1() #0 nounwind {
+; RV64I-LABEL: f1:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lui a0, 1
+; RV64I-NEXT: sub sp, sp, a0
+; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: li a0, 3
+; RV64I-NEXT: sb a0, 16(sp)
+; RV64I-NEXT: lbu a0, 16(sp)
+; RV64I-NEXT: lui a1, 1
+; RV64I-NEXT: addiw a1, a1, 16
+; RV64I-NEXT: add sp, sp, a1
+; RV64I-NEXT: ret
+;
+; RV32I-LABEL: f1:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a0, 1
+; RV32I-NEXT: sub sp, sp, a0
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: li a0, 3
+; RV32I-NEXT: sb a0, 16(sp)
+; RV32I-NEXT: lbu a0, 16(sp)
+; RV32I-NEXT: lui a1, 1
+; RV32I-NEXT: addi a1, a1, 16
+; RV32I-NEXT: add sp, sp, a1
+; RV32I-NEXT: ret
+entry:
+ %a = alloca i8, i64 4096
+ %b = getelementptr inbounds i8, ptr %a, i64 63
+ store volatile i8 3, ptr %a
+ %c = load volatile i8, ptr %a
+ ret i8 %c
+}
+
+attributes #0 = { "probe-stack"="inline-asm" }
diff --git a/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll b/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll
new file mode 100644
index 00000000000000..2740d762ed930a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll
@@ -0,0 +1,78 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+m -O2 < %s \
+; RUN: | FileCheck %s -check-prefix=RV64I
+; RUN: llc -mtriple=riscv32 -mattr=+m -O2 < %s \
+; RUN: | FileCheck %s -check-prefix=RV32I
+
+; Tests copied from PowerPC.
+
+; Free probe
+define i8 @f0() #0 nounwind {
+; RV64I-LABEL: f0:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -64
+; RV64I-NEXT: li a0, 3
+; RV64I-NEXT: sb a0, 0(sp)
+; RV64I-NEXT: lbu a0, 0(sp)
+; RV64I-NEXT: addi sp, sp, 64
+; RV64I-NEXT: ret
+;
+; RV32I-LABEL: f0:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -64
+; RV32I-NEXT: li a0, 3
+; RV32I-NEXT: sb a0, 0(sp)
+; RV32I-NEXT: lbu a0, 0(sp)
+; RV32I-NEXT: addi sp, sp, 64
+; RV32I-NEXT: ret
+entry:
+ %a = alloca i8, i64 64
+ %b = getelementptr inbounds i8, ptr %a, i64 63
+ store volatile i8 3, ptr %a
+ %c = load volatile i8, ptr %a
+ ret i8 %c
+}
+
+define i8 @f1() #0 {
+; RV64I-LABEL: f1:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lui a0, 1
+; RV64I-NEXT: sub sp, sp, a0
+; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: .cfi_def_cfa_offset 4096
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: .cfi_def_cfa_offset 4112
+; RV64I-NEXT: li a0, 3
+; RV64I-NEXT: sb a0, 16(sp)
+; RV64I-NEXT: lbu a0, 16(sp)
+; RV64I-NEXT: lui a1, 1
+; RV64I-NEXT: addiw a1, a1, 16
+; RV64I-NEXT: add sp, sp, a1
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-LABEL: f1:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a0, 1
+; RV32I-NEXT: sub sp, sp, a0
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: .cfi_def_cfa_offset 4096
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 4112
+; RV32I-NEXT: li a0, 3
+; RV32I-NEXT: sb a0, 16(sp)
+; RV32I-NEXT: lbu a0, 16(sp)
+; RV32I-NEXT: lui a1, 1
+; RV32I-NEXT: addi a1, a1, 16
+; RV32I-NEXT: add sp, sp, a1
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+entry:
+ %a = alloca i8, i64 4096
+ %b = getelementptr inbounds i8, ptr %a, i64 63
+ store volatile i8 3, ptr %a
+ %c = load volatile i8, ptr %a
+ ret i8 %c
+}
+
+attributes #0 = { "probe-stack"="inline-asm" }
>From 34a0b7147f00439e97d0a29de73f3a2c1e52593d Mon Sep 17 00:00:00 2001
From: Raphael Moreira Zinsly <rzinsly at ventanamicro.com>
Date: Mon, 25 Nov 2024 16:11:51 -0300
Subject: [PATCH 3/3] [RISCV] Improve stack clash probe loop
Limit the unrolled probe loop and emit a variable length probe loop
for bigger allocations.
We add a new pseudo instruction RISCV::PROBED_STACKALLOC that will
later be synthesized in a loop by `inlineStackProbe`.
---
llvm/lib/Target/RISCV/RISCVFrameLowering.cpp | 190 ++++++--
llvm/lib/Target/RISCV/RISCVFrameLowering.h | 3 +
llvm/lib/Target/RISCV/RISCVInstrInfo.td | 11 +
.../RISCV/stack-clash-prologue-nounwind.ll | 273 +++++++++++
.../CodeGen/RISCV/stack-clash-prologue.ll | 459 ++++++++++++++++++
5 files changed, 906 insertions(+), 30 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 128b49eae3d846..eae7f24392b5c8 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -638,46 +638,107 @@ void RISCVFrameLowering::allocateStack(MachineBasicBlock &MBB,
return;
}
- // Do an unrolled probe loop.
- uint64_t CurrentOffset = 0;
- bool IsRV64 = STI.is64Bit();
- while (CurrentOffset + ProbeSize <= Offset) {
- RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
- StackOffset::getFixed(-ProbeSize), MachineInstr::FrameSetup,
- getStackAlign());
- // s[d|w] zero, 0(sp)
- BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
- .addReg(RISCV::X0)
- .addReg(SPReg)
- .addImm(0)
- .setMIFlags(MachineInstr::FrameSetup);
+ // Unroll the probe loop depending on the number of iterations.
+ if (Offset < ProbeSize * 5) {
+ uint64_t CurrentOffset = 0;
+ bool IsRV64 = STI.is64Bit();
+ while (CurrentOffset + ProbeSize <= Offset) {
+ RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
+ StackOffset::getFixed(-ProbeSize), MachineInstr::FrameSetup,
+ getStackAlign());
+ // s[d|w] zero, 0(sp)
+ BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
+ .addReg(RISCV::X0)
+ .addReg(SPReg)
+ .addImm(0)
+ .setMIFlags(MachineInstr::FrameSetup);
+
+ CurrentOffset += ProbeSize;
+ if (EmitCFI) {
+ // Emit ".cfi_def_cfa_offset CurrentOffset"
+ unsigned CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, CurrentOffset));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ }
- CurrentOffset += ProbeSize;
- if (EmitCFI) {
- // Emit ".cfi_def_cfa_offset CurrentOffset"
- unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::cfiDefCfaOffset(nullptr, CurrentOffset));
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlag(MachineInstr::FrameSetup);
+ uint64_t Residual = Offset - CurrentOffset;
+ if (Residual) {
+ RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
+ StackOffset::getFixed(-Residual), MachineInstr::FrameSetup,
+ getStackAlign());
+ if (EmitCFI) {
+ // Emit ".cfi_def_cfa_offset Offset"
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Offset));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
}
+
+ return;
+ }
+
+ // Emit a variable-length allocation probing loop.
+ uint64_t RoundedSize = (Offset / ProbeSize) * ProbeSize;
+ uint64_t Residual = Offset - RoundedSize;
+
+ Register TargetReg = RISCV::X6;
+ // SUB TargetReg, SP, RoundedSize
+ RI->adjustReg(MBB, MBBI, DL, TargetReg, SPReg,
+ StackOffset::getFixed(-RoundedSize), MachineInstr::FrameSetup,
+ getStackAlign());
+
+ if (EmitCFI) {
+ // Set the CFA register to TargetReg.
+ unsigned Reg = STI.getRegisterInfo()->getDwarfRegNum(TargetReg, true);
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::cfiDefCfa(nullptr, Reg, RoundedSize));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+
+ // It will be expanded to a probe loop in `inlineStackProbe`.
+ BuildMI(MBB, MBBI, DL, TII->get(RISCV::PROBED_STACKALLOC))
+ .addReg(SPReg)
+ .addReg(TargetReg);
+
+ if (EmitCFI) {
+ // Set the CFA register back to SP.
+ unsigned Reg = STI.getRegisterInfo()->getDwarfRegNum(SPReg, true);
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
}
- uint64_t Residual = Offset - CurrentOffset;
if (Residual) {
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(-Residual),
MachineInstr::FrameSetup, getStackAlign());
- if (EmitCFI) {
- // Emit ".cfi_def_cfa_offset Offset"
- unsigned CFIIndex =
- MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Offset));
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlag(MachineInstr::FrameSetup);
+ if (Residual > ProbeSize) {
+ // s[d|w] zero, 0(sp)
+ bool IsRV64 = STI.hasFeature(RISCV::Feature64Bit);
+ BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
+ .addReg(RISCV::X0)
+ .addReg(SPReg)
+ .addImm(0)
+ .setMIFlags(MachineInstr::FrameSetup);
}
}
- return;
+ if (EmitCFI) {
+ // Emit ".cfi_def_cfa_offset Offset"
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Offset));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
}
void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
@@ -1988,3 +2049,72 @@ bool RISCVFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
TargetStackID::Value RISCVFrameLowering::getStackIDForScalableVectors() const {
return TargetStackID::ScalableVector;
}
+
+// Synthesize the probe loop.
+static void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL) {
+
+ auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();
+ const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
+ bool IsRV64 = Subtarget.is64Bit();
+ Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();
+ const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
+ uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
+
+ MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
+ MachineBasicBlock *LoopTestMBB =
+ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+ MF.insert(MBBInsertPoint, LoopTestMBB);
+ MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+ MF.insert(MBBInsertPoint, ExitMBB);
+ MachineInstr::MIFlag Flags = MachineInstr::FrameSetup;
+ Register TargetReg = RISCV::X6;
+ Register ScratchReg = RISCV::X7;
+
+ // ScratchReg = ProbeSize
+ TII->movImm(MBB, MBBI, DL, ScratchReg, ProbeSize, Flags);
+
+ // LoopTest:
+ // SUB SP, SP, ProbeSize
+ BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::SUB), SPReg)
+ .addReg(SPReg)
+ .addReg(ScratchReg)
+ .setMIFlags(Flags);
+
+ // s[d|w] zero, 0(sp)
+ BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL,
+ TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
+ .addReg(RISCV::X0)
+ .addReg(SPReg)
+ .addImm(0)
+ .setMIFlags(Flags);
+
+ // BNE SP, TargetReg, LoopTest
+ BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BNE))
+ .addReg(SPReg)
+ .addReg(TargetReg)
+ .addMBB(LoopTestMBB)
+ .setMIFlags(Flags);
+
+ ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
+
+ LoopTestMBB->addSuccessor(ExitMBB);
+ LoopTestMBB->addSuccessor(LoopTestMBB);
+ MBB.addSuccessor(LoopTestMBB);
+}
+
+void RISCVFrameLowering::inlineStackProbe(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ // Get the instructions that need to be replaced. We emit at most two of
+ // these. Remember them in order to avoid complications coming from the need
+ // to traverse the block while potentially creating more blocks.
+ auto Where = llvm::find_if(MBB, [](MachineInstr &MI) {
+ return MI.getOpcode() == RISCV::PROBED_STACKALLOC;
+ });
+ if (Where != MBB.end()) {
+ DebugLoc DL = MBB.findDebugLoc(Where);
+ emitStackProbeInline(MF, MBB, Where, DL);
+ Where->eraseFromParent();
+ }
+}
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
index 6946e81dbd0675..f6ab5dc9ecfa37 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
@@ -107,6 +107,9 @@ class RISCVFrameLowering : public TargetFrameLowering {
std::pair<int64_t, Align>
assignRVVStackObjectOffsets(MachineFunction &MF) const;
+ // Replace a StackProbe stub (if any) with the actual probe code inline
+ void inlineStackProbe(MachineFunction &MF,
+ MachineBasicBlock &PrologueMBB) const override;
};
} // namespace llvm
#endif
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 5747f05ffafd47..a442a7c84ab340 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1373,6 +1373,17 @@ def PseudoAddTPRel : Pseudo<(outs GPR:$rd),
def : Pat<(FrameAddrRegImm (iPTR GPR:$rs1), simm12:$imm12),
(ADDI GPR:$rs1, simm12:$imm12)>;
+/// Stack probing
+
+let hasSideEffects = 1, mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
+// Probed stack allocation of a constant size, used in function prologues when
+// stack-clash protection is enabled.
+def PROBED_STACKALLOC : Pseudo<(outs GPR:$sp),
+ (ins GPR:$scratch),
+ []>,
+ Sched<[]>;
+}
+
/// HI and ADD_LO address nodes.
// Pseudo for a rematerializable LUI+ADDI sequence for loading an address.
diff --git a/llvm/test/CodeGen/RISCV/stack-clash-prologue-nounwind.ll b/llvm/test/CodeGen/RISCV/stack-clash-prologue-nounwind.ll
index 1cb134503fbd29..3b2d7f1f9a8ea4 100644
--- a/llvm/test/CodeGen/RISCV/stack-clash-prologue-nounwind.ll
+++ b/llvm/test/CodeGen/RISCV/stack-clash-prologue-nounwind.ll
@@ -69,4 +69,277 @@ entry:
ret i8 %c
}
+define i8 @f2() #0 nounwind {
+; RV64I-LABEL: f2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lui a0, 16
+; RV64I-NEXT: sub t1, sp, a0
+; RV64I-NEXT: lui t2, 1
+; RV64I-NEXT: .LBB2_1: # %entry
+; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT: sub sp, sp, t2
+; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: bne sp, t1, .LBB2_1
+; RV64I-NEXT: # %bb.2: # %entry
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: li a0, 3
+; RV64I-NEXT: sb a0, 16(sp)
+; RV64I-NEXT: lbu a0, 16(sp)
+; RV64I-NEXT: lui a1, 16
+; RV64I-NEXT: addiw a1, a1, 16
+; RV64I-NEXT: add sp, sp, a1
+; RV64I-NEXT: ret
+;
+; RV32I-LABEL: f2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a0, 16
+; RV32I-NEXT: sub t1, sp, a0
+; RV32I-NEXT: lui t2, 1
+; RV32I-NEXT: .LBB2_1: # %entry
+; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT: sub sp, sp, t2
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: bne sp, t1, .LBB2_1
+; RV32I-NEXT: # %bb.2: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: li a0, 3
+; RV32I-NEXT: sb a0, 16(sp)
+; RV32I-NEXT: lbu a0, 16(sp)
+; RV32I-NEXT: lui a1, 16
+; RV32I-NEXT: addi a1, a1, 16
+; RV32I-NEXT: add sp, sp, a1
+; RV32I-NEXT: ret
+entry:
+ %a = alloca i8, i64 65536
+ %b = getelementptr inbounds i8, ptr %a, i64 63
+ store volatile i8 3, ptr %a
+ %c = load volatile i8, ptr %a
+ ret i8 %c
+}
+
+define i8 @f3() #0 "stack-probe-size"="32768" nounwind {
+; RV64I-LABEL: f3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lui a0, 8
+; RV64I-NEXT: sub sp, sp, a0
+; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: lui a0, 8
+; RV64I-NEXT: sub sp, sp, a0
+; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: li a0, 3
+; RV64I-NEXT: sb a0, 16(sp)
+; RV64I-NEXT: lbu a0, 16(sp)
+; RV64I-NEXT: lui a1, 16
+; RV64I-NEXT: addiw a1, a1, 16
+; RV64I-NEXT: add sp, sp, a1
+; RV64I-NEXT: ret
+;
+; RV32I-LABEL: f3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a0, 8
+; RV32I-NEXT: sub sp, sp, a0
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: lui a0, 8
+; RV32I-NEXT: sub sp, sp, a0
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: li a0, 3
+; RV32I-NEXT: sb a0, 16(sp)
+; RV32I-NEXT: lbu a0, 16(sp)
+; RV32I-NEXT: lui a1, 16
+; RV32I-NEXT: addi a1, a1, 16
+; RV32I-NEXT: add sp, sp, a1
+; RV32I-NEXT: ret
+entry:
+ %a = alloca i8, i64 65536
+ %b = getelementptr inbounds i8, ptr %a, i64 63
+ store volatile i8 3, ptr %a
+ %c = load volatile i8, ptr %a
+ ret i8 %c
+}
+
+; Same as f2, but without protection.
+define i8 @f4() nounwind {
+; RV64I-LABEL: f4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lui a0, 16
+; RV64I-NEXT: addiw a0, a0, 16
+; RV64I-NEXT: sub sp, sp, a0
+; RV64I-NEXT: li a0, 3
+; RV64I-NEXT: sb a0, 16(sp)
+; RV64I-NEXT: lbu a0, 16(sp)
+; RV64I-NEXT: lui a1, 16
+; RV64I-NEXT: addiw a1, a1, 16
+; RV64I-NEXT: add sp, sp, a1
+; RV64I-NEXT: ret
+;
+; RV32I-LABEL: f4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a0, 16
+; RV32I-NEXT: addi a0, a0, 16
+; RV32I-NEXT: sub sp, sp, a0
+; RV32I-NEXT: li a0, 3
+; RV32I-NEXT: sb a0, 16(sp)
+; RV32I-NEXT: lbu a0, 16(sp)
+; RV32I-NEXT: lui a1, 16
+; RV32I-NEXT: addi a1, a1, 16
+; RV32I-NEXT: add sp, sp, a1
+; RV32I-NEXT: ret
+entry:
+ %a = alloca i8, i64 65536
+ %b = getelementptr inbounds i8, ptr %a, i64 63
+ store volatile i8 3, ptr %a
+ %c = load volatile i8, ptr %a
+ ret i8 %c
+}
+
+define i8 @f5() #0 "stack-probe-size"="65536" nounwind {
+; RV64I-LABEL: f5:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lui a0, 256
+; RV64I-NEXT: sub t1, sp, a0
+; RV64I-NEXT: lui t2, 16
+; RV64I-NEXT: .LBB5_1: # %entry
+; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT: sub sp, sp, t2
+; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: bne sp, t1, .LBB5_1
+; RV64I-NEXT: # %bb.2: # %entry
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: li a0, 3
+; RV64I-NEXT: sb a0, 16(sp)
+; RV64I-NEXT: lbu a0, 16(sp)
+; RV64I-NEXT: lui a1, 256
+; RV64I-NEXT: addiw a1, a1, 16
+; RV64I-NEXT: add sp, sp, a1
+; RV64I-NEXT: ret
+;
+; RV32I-LABEL: f5:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a0, 256
+; RV32I-NEXT: sub t1, sp, a0
+; RV32I-NEXT: lui t2, 16
+; RV32I-NEXT: .LBB5_1: # %entry
+; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT: sub sp, sp, t2
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: bne sp, t1, .LBB5_1
+; RV32I-NEXT: # %bb.2: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: li a0, 3
+; RV32I-NEXT: sb a0, 16(sp)
+; RV32I-NEXT: lbu a0, 16(sp)
+; RV32I-NEXT: lui a1, 256
+; RV32I-NEXT: addi a1, a1, 16
+; RV32I-NEXT: add sp, sp, a1
+; RV32I-NEXT: ret
+entry:
+ %a = alloca i8, i64 1048576
+ %b = getelementptr inbounds i8, ptr %a, i64 63
+ store volatile i8 3, ptr %a
+ %c = load volatile i8, ptr %a
+ ret i8 %c
+}
+
+define i8 @f6() #0 nounwind {
+; RV64I-LABEL: f6:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lui a0, 262144
+; RV64I-NEXT: sub t1, sp, a0
+; RV64I-NEXT: lui t2, 1
+; RV64I-NEXT: .LBB6_1: # %entry
+; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT: sub sp, sp, t2
+; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: bne sp, t1, .LBB6_1
+; RV64I-NEXT: # %bb.2: # %entry
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: li a0, 3
+; RV64I-NEXT: sb a0, 16(sp)
+; RV64I-NEXT: lbu a0, 16(sp)
+; RV64I-NEXT: lui a1, 262144
+; RV64I-NEXT: addiw a1, a1, 16
+; RV64I-NEXT: add sp, sp, a1
+; RV64I-NEXT: ret
+;
+; RV32I-LABEL: f6:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a0, 262144
+; RV32I-NEXT: sub t1, sp, a0
+; RV32I-NEXT: lui t2, 1
+; RV32I-NEXT: .LBB6_1: # %entry
+; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT: sub sp, sp, t2
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: bne sp, t1, .LBB6_1
+; RV32I-NEXT: # %bb.2: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: li a0, 3
+; RV32I-NEXT: sb a0, 16(sp)
+; RV32I-NEXT: lbu a0, 16(sp)
+; RV32I-NEXT: lui a1, 262144
+; RV32I-NEXT: addi a1, a1, 16
+; RV32I-NEXT: add sp, sp, a1
+; RV32I-NEXT: ret
+entry:
+ %a = alloca i8, i64 1073741824
+ %b = getelementptr inbounds i8, ptr %a, i64 63
+ store volatile i8 3, ptr %a
+ %c = load volatile i8, ptr %a
+ ret i8 %c
+}
+
+define i8 @f7() #0 "stack-probe-size"="65536" nounwind {
+; RV64I-LABEL: f7:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lui a0, 244128
+; RV64I-NEXT: sub t1, sp, a0
+; RV64I-NEXT: lui t2, 16
+; RV64I-NEXT: .LBB7_1: # %entry
+; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT: sub sp, sp, t2
+; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: bne sp, t1, .LBB7_1
+; RV64I-NEXT: # %bb.2: # %entry
+; RV64I-NEXT: lui a0, 13
+; RV64I-NEXT: addiw a0, a0, -1520
+; RV64I-NEXT: sub sp, sp, a0
+; RV64I-NEXT: li a0, 3
+; RV64I-NEXT: sb a0, 9(sp)
+; RV64I-NEXT: lbu a0, 9(sp)
+; RV64I-NEXT: lui a1, 244141
+; RV64I-NEXT: addiw a1, a1, -1520
+; RV64I-NEXT: add sp, sp, a1
+; RV64I-NEXT: ret
+;
+; RV32I-LABEL: f7:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a0, 244128
+; RV32I-NEXT: sub t1, sp, a0
+; RV32I-NEXT: lui t2, 16
+; RV32I-NEXT: .LBB7_1: # %entry
+; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT: sub sp, sp, t2
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: bne sp, t1, .LBB7_1
+; RV32I-NEXT: # %bb.2: # %entry
+; RV32I-NEXT: lui a0, 13
+; RV32I-NEXT: addi a0, a0, -1520
+; RV32I-NEXT: sub sp, sp, a0
+; RV32I-NEXT: li a0, 3
+; RV32I-NEXT: sb a0, 9(sp)
+; RV32I-NEXT: lbu a0, 9(sp)
+; RV32I-NEXT: lui a1, 244141
+; RV32I-NEXT: addi a1, a1, -1520
+; RV32I-NEXT: add sp, sp, a1
+; RV32I-NEXT: ret
+entry:
+ %a = alloca i8, i64 1000000007
+ %b = getelementptr inbounds i8, ptr %a, i64 101
+ store volatile i8 3, ptr %a
+ %c = load volatile i8, ptr %a
+ ret i8 %c
+}
+
attributes #0 = { "probe-stack"="inline-asm" }
diff --git a/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll b/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll
index 2740d762ed930a..36d58c900d2d34 100644
--- a/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll
+++ b/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll
@@ -75,4 +75,463 @@ entry:
ret i8 %c
}
+define i8 @f2() #0 {
+; RV64I-LABEL: f2:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lui a0, 16
+; RV64I-NEXT: sub t1, sp, a0
+; RV64I-NEXT: .cfi_def_cfa t1, 65536
+; RV64I-NEXT: lui t2, 1
+; RV64I-NEXT: .LBB2_1: # %entry
+; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT: sub sp, sp, t2
+; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: bne sp, t1, .LBB2_1
+; RV64I-NEXT: # %bb.2: # %entry
+; RV64I-NEXT: .cfi_def_cfa_register sp
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: .cfi_def_cfa_offset 65552
+; RV64I-NEXT: li a0, 3
+; RV64I-NEXT: sb a0, 16(sp)
+; RV64I-NEXT: lbu a0, 16(sp)
+; RV64I-NEXT: lui a1, 16
+; RV64I-NEXT: addiw a1, a1, 16
+; RV64I-NEXT: add sp, sp, a1
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-LABEL: f2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a0, 16
+; RV32I-NEXT: sub t1, sp, a0
+; RV32I-NEXT: .cfi_def_cfa t1, 65536
+; RV32I-NEXT: lui t2, 1
+; RV32I-NEXT: .LBB2_1: # %entry
+; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT: sub sp, sp, t2
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: bne sp, t1, .LBB2_1
+; RV32I-NEXT: # %bb.2: # %entry
+; RV32I-NEXT: .cfi_def_cfa_register sp
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 65552
+; RV32I-NEXT: li a0, 3
+; RV32I-NEXT: sb a0, 16(sp)
+; RV32I-NEXT: lbu a0, 16(sp)
+; RV32I-NEXT: lui a1, 16
+; RV32I-NEXT: addi a1, a1, 16
+; RV32I-NEXT: add sp, sp, a1
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+entry:
+ %a = alloca i8, i64 65536
+ %b = getelementptr inbounds i8, ptr %a, i64 63
+ store volatile i8 3, ptr %a
+ %c = load volatile i8, ptr %a
+ ret i8 %c
+}
+
+define i8 @f3() #0 "stack-probe-size"="32768" {
+; RV64I-LABEL: f3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lui a0, 8
+; RV64I-NEXT: sub sp, sp, a0
+; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: .cfi_def_cfa_offset 32768
+; RV64I-NEXT: lui a0, 8
+; RV64I-NEXT: sub sp, sp, a0
+; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: .cfi_def_cfa_offset 65536
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: .cfi_def_cfa_offset 65552
+; RV64I-NEXT: li a0, 3
+; RV64I-NEXT: sb a0, 16(sp)
+; RV64I-NEXT: lbu a0, 16(sp)
+; RV64I-NEXT: lui a1, 16
+; RV64I-NEXT: addiw a1, a1, 16
+; RV64I-NEXT: add sp, sp, a1
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-LABEL: f3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a0, 8
+; RV32I-NEXT: sub sp, sp, a0
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: .cfi_def_cfa_offset 32768
+; RV32I-NEXT: lui a0, 8
+; RV32I-NEXT: sub sp, sp, a0
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: .cfi_def_cfa_offset 65536
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 65552
+; RV32I-NEXT: li a0, 3
+; RV32I-NEXT: sb a0, 16(sp)
+; RV32I-NEXT: lbu a0, 16(sp)
+; RV32I-NEXT: lui a1, 16
+; RV32I-NEXT: addi a1, a1, 16
+; RV32I-NEXT: add sp, sp, a1
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+entry:
+ %a = alloca i8, i64 65536
+ %b = getelementptr inbounds i8, ptr %a, i64 63
+ store volatile i8 3, ptr %a
+ %c = load volatile i8, ptr %a
+ ret i8 %c
+}
+
+; Same as f2, but without protection.
+define i8 @f4() {
+; RV64I-LABEL: f4:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lui a0, 16
+; RV64I-NEXT: addiw a0, a0, 16
+; RV64I-NEXT: sub sp, sp, a0
+; RV64I-NEXT: .cfi_def_cfa_offset 65552
+; RV64I-NEXT: li a0, 3
+; RV64I-NEXT: sb a0, 16(sp)
+; RV64I-NEXT: lbu a0, 16(sp)
+; RV64I-NEXT: lui a1, 16
+; RV64I-NEXT: addiw a1, a1, 16
+; RV64I-NEXT: add sp, sp, a1
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-LABEL: f4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a0, 16
+; RV32I-NEXT: addi a0, a0, 16
+; RV32I-NEXT: sub sp, sp, a0
+; RV32I-NEXT: .cfi_def_cfa_offset 65552
+; RV32I-NEXT: li a0, 3
+; RV32I-NEXT: sb a0, 16(sp)
+; RV32I-NEXT: lbu a0, 16(sp)
+; RV32I-NEXT: lui a1, 16
+; RV32I-NEXT: addi a1, a1, 16
+; RV32I-NEXT: add sp, sp, a1
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+entry:
+ %a = alloca i8, i64 65536
+ %b = getelementptr inbounds i8, ptr %a, i64 63
+ store volatile i8 3, ptr %a
+ %c = load volatile i8, ptr %a
+ ret i8 %c
+}
+
+define i8 @f5() #0 "stack-probe-size"="65536" {
+; RV64I-LABEL: f5:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lui a0, 256
+; RV64I-NEXT: sub t1, sp, a0
+; RV64I-NEXT: .cfi_def_cfa t1, 1048576
+; RV64I-NEXT: lui t2, 16
+; RV64I-NEXT: .LBB5_1: # %entry
+; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT: sub sp, sp, t2
+; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: bne sp, t1, .LBB5_1
+; RV64I-NEXT: # %bb.2: # %entry
+; RV64I-NEXT: .cfi_def_cfa_register sp
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: .cfi_def_cfa_offset 1048592
+; RV64I-NEXT: li a0, 3
+; RV64I-NEXT: sb a0, 16(sp)
+; RV64I-NEXT: lbu a0, 16(sp)
+; RV64I-NEXT: lui a1, 256
+; RV64I-NEXT: addiw a1, a1, 16
+; RV64I-NEXT: add sp, sp, a1
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-LABEL: f5:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a0, 256
+; RV32I-NEXT: sub t1, sp, a0
+; RV32I-NEXT: .cfi_def_cfa t1, 1048576
+; RV32I-NEXT: lui t2, 16
+; RV32I-NEXT: .LBB5_1: # %entry
+; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT: sub sp, sp, t2
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: bne sp, t1, .LBB5_1
+; RV32I-NEXT: # %bb.2: # %entry
+; RV32I-NEXT: .cfi_def_cfa_register sp
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 1048592
+; RV32I-NEXT: li a0, 3
+; RV32I-NEXT: sb a0, 16(sp)
+; RV32I-NEXT: lbu a0, 16(sp)
+; RV32I-NEXT: lui a1, 256
+; RV32I-NEXT: addi a1, a1, 16
+; RV32I-NEXT: add sp, sp, a1
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+entry:
+ %a = alloca i8, i64 1048576
+ %b = getelementptr inbounds i8, ptr %a, i64 63
+ store volatile i8 3, ptr %a
+ %c = load volatile i8, ptr %a
+ ret i8 %c
+}
+
+define i8 @f6() #0 {
+; RV64I-LABEL: f6:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lui a0, 262144
+; RV64I-NEXT: sub t1, sp, a0
+; RV64I-NEXT: .cfi_def_cfa t1, 1073741824
+; RV64I-NEXT: lui t2, 1
+; RV64I-NEXT: .LBB6_1: # %entry
+; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT: sub sp, sp, t2
+; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: bne sp, t1, .LBB6_1
+; RV64I-NEXT: # %bb.2: # %entry
+; RV64I-NEXT: .cfi_def_cfa_register sp
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: .cfi_def_cfa_offset 1073741840
+; RV64I-NEXT: li a0, 3
+; RV64I-NEXT: sb a0, 16(sp)
+; RV64I-NEXT: lbu a0, 16(sp)
+; RV64I-NEXT: lui a1, 262144
+; RV64I-NEXT: addiw a1, a1, 16
+; RV64I-NEXT: add sp, sp, a1
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-LABEL: f6:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a0, 262144
+; RV32I-NEXT: sub t1, sp, a0
+; RV32I-NEXT: .cfi_def_cfa t1, 1073741824
+; RV32I-NEXT: lui t2, 1
+; RV32I-NEXT: .LBB6_1: # %entry
+; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT: sub sp, sp, t2
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: bne sp, t1, .LBB6_1
+; RV32I-NEXT: # %bb.2: # %entry
+; RV32I-NEXT: .cfi_def_cfa_register sp
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 1073741840
+; RV32I-NEXT: li a0, 3
+; RV32I-NEXT: sb a0, 16(sp)
+; RV32I-NEXT: lbu a0, 16(sp)
+; RV32I-NEXT: lui a1, 262144
+; RV32I-NEXT: addi a1, a1, 16
+; RV32I-NEXT: add sp, sp, a1
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+entry:
+ %a = alloca i8, i64 1073741824
+ %b = getelementptr inbounds i8, ptr %a, i64 63
+ store volatile i8 3, ptr %a
+ %c = load volatile i8, ptr %a
+ ret i8 %c
+}
+
+define i8 @f7() #0 "stack-probe-size"="65536" {
+; RV64I-LABEL: f7:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lui a0, 244128
+; RV64I-NEXT: sub t1, sp, a0
+; RV64I-NEXT: .cfi_def_cfa t1, 999948288
+; RV64I-NEXT: lui t2, 16
+; RV64I-NEXT: .LBB7_1: # %entry
+; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT: sub sp, sp, t2
+; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: bne sp, t1, .LBB7_1
+; RV64I-NEXT: # %bb.2: # %entry
+; RV64I-NEXT: .cfi_def_cfa_register sp
+; RV64I-NEXT: lui a0, 13
+; RV64I-NEXT: addiw a0, a0, -1520
+; RV64I-NEXT: sub sp, sp, a0
+; RV64I-NEXT: .cfi_def_cfa_offset 1000000016
+; RV64I-NEXT: li a0, 3
+; RV64I-NEXT: sb a0, 9(sp)
+; RV64I-NEXT: lbu a0, 9(sp)
+; RV64I-NEXT: lui a1, 244141
+; RV64I-NEXT: addiw a1, a1, -1520
+; RV64I-NEXT: add sp, sp, a1
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-LABEL: f7:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a0, 244128
+; RV32I-NEXT: sub t1, sp, a0
+; RV32I-NEXT: .cfi_def_cfa t1, 999948288
+; RV32I-NEXT: lui t2, 16
+; RV32I-NEXT: .LBB7_1: # %entry
+; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT: sub sp, sp, t2
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: bne sp, t1, .LBB7_1
+; RV32I-NEXT: # %bb.2: # %entry
+; RV32I-NEXT: .cfi_def_cfa_register sp
+; RV32I-NEXT: lui a0, 13
+; RV32I-NEXT: addi a0, a0, -1520
+; RV32I-NEXT: sub sp, sp, a0
+; RV32I-NEXT: .cfi_def_cfa_offset 1000000016
+; RV32I-NEXT: li a0, 3
+; RV32I-NEXT: sb a0, 9(sp)
+; RV32I-NEXT: lbu a0, 9(sp)
+; RV32I-NEXT: lui a1, 244141
+; RV32I-NEXT: addi a1, a1, -1520
+; RV32I-NEXT: add sp, sp, a1
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+entry:
+ %a = alloca i8, i64 1000000007
+ %b = getelementptr inbounds i8, ptr %a, i64 101
+ store volatile i8 3, ptr %a
+ %c = load volatile i8, ptr %a
+ ret i8 %c
+}
+
+; alloca + align < probe_size
+define i32 @f8(i64 %i) local_unnamed_addr #0 {
+; RV64I-LABEL: f8:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -832
+; RV64I-NEXT: .cfi_def_cfa_offset 832
+; RV64I-NEXT: sd ra, 824(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 816(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: addi s0, sp, 832
+; RV64I-NEXT: .cfi_def_cfa s0, 0
+; RV64I-NEXT: andi sp, sp, -64
+; RV64I-NEXT: slli a0, a0, 2
+; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: li a1, 1
+; RV64I-NEXT: sw a1, 0(a0)
+; RV64I-NEXT: lw a0, 0(sp)
+; RV64I-NEXT: addi sp, s0, -832
+; RV64I-NEXT: .cfi_def_cfa sp, 832
+; RV64I-NEXT: ld ra, 824(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 816(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: addi sp, sp, 832
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-LABEL: f8:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -832
+; RV32I-NEXT: .cfi_def_cfa_offset 832
+; RV32I-NEXT: sw ra, 828(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 824(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: addi s0, sp, 832
+; RV32I-NEXT: .cfi_def_cfa s0, 0
+; RV32I-NEXT: andi sp, sp, -64
+; RV32I-NEXT: slli a0, a0, 2
+; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: li a1, 1
+; RV32I-NEXT: sw a1, 0(a0)
+; RV32I-NEXT: lw a0, 0(sp)
+; RV32I-NEXT: addi sp, s0, -832
+; RV32I-NEXT: .cfi_def_cfa sp, 832
+; RV32I-NEXT: lw ra, 828(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 824(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: addi sp, sp, 832
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+ %a = alloca i32, i32 200, align 64
+ %b = getelementptr inbounds i32, ptr %a, i64 %i
+ store volatile i32 1, ptr %b
+ %c = load volatile i32, ptr %a
+ ret i32 %c
+}
+
+; alloca > probe_size, align > probe_size
+define i32 @f9(i64 %i) local_unnamed_addr #0 {
+; RV64I-LABEL: f9:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -2032
+; RV64I-NEXT: .cfi_def_cfa_offset 2032
+; RV64I-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 2016(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: addi s0, sp, 2032
+; RV64I-NEXT: .cfi_def_cfa s0, 0
+; RV64I-NEXT: lui a1, 1
+; RV64I-NEXT: sub sp, sp, a1
+; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: sub sp, sp, a1
+; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: andi sp, sp, -2048
+; RV64I-NEXT: slli a0, a0, 2
+; RV64I-NEXT: addi a1, sp, 2047
+; RV64I-NEXT: addi a1, a1, 1
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: li a1, 1
+; RV64I-NEXT: sw a1, 0(a0)
+; RV64I-NEXT: lui a0, 1
+; RV64I-NEXT: add a0, sp, a0
+; RV64I-NEXT: lw a0, -2048(a0)
+; RV64I-NEXT: addi sp, s0, -2032
+; RV64I-NEXT: .cfi_def_cfa sp, 2032
+; RV64I-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: .cfi_restore s0
+; RV64I-NEXT: addi sp, sp, 2032
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV32I-LABEL: f9:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -2032
+; RV32I-NEXT: .cfi_def_cfa_offset 2032
+; RV32I-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 2024(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: addi s0, sp, 2032
+; RV32I-NEXT: .cfi_def_cfa s0, 0
+; RV32I-NEXT: lui a1, 1
+; RV32I-NEXT: sub sp, sp, a1
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: sub sp, sp, a1
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: andi sp, sp, -2048
+; RV32I-NEXT: slli a0, a0, 2
+; RV32I-NEXT: addi a1, sp, 2047
+; RV32I-NEXT: addi a1, a1, 1
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: li a1, 1
+; RV32I-NEXT: sw a1, 0(a0)
+; RV32I-NEXT: lui a0, 1
+; RV32I-NEXT: add a0, sp, a0
+; RV32I-NEXT: lw a0, -2048(a0)
+; RV32I-NEXT: addi sp, s0, -2032
+; RV32I-NEXT: .cfi_def_cfa sp, 2032
+; RV32I-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 2024(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore ra
+; RV32I-NEXT: .cfi_restore s0
+; RV32I-NEXT: addi sp, sp, 2032
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+ %a = alloca i32, i32 2000, align 2048
+ %b = getelementptr inbounds i32, ptr %a, i64 %i
+ store volatile i32 1, ptr %b
+ %c = load volatile i32, ptr %a
+ ret i32 %c
+}
+
attributes #0 = { "probe-stack"="inline-asm" }
More information about the llvm-commits
mailing list