[llvm] [Hexagon] Add probe-stack=inline-asm support for stack clash protection (PR #190568)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Apr 5 18:13:56 PDT 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-hexagon
Author: Brian Cain (androm3da)
<details>
<summary>Changes</summary>
Large stack allocations can skip over guard pages, causing stack clash vulnerabilities. The probe-stack=inline-asm function attribute tells LLVM to emit inline probing code that touches each page during stack allocation, ensuring guard pages are hit.
`framelimit` provides hardware bounds checking, but only on the allocframe instruction itself. SP decrements via A2_addi -- used for frames >= 16k bytes and all no-FP prologues bypass it. Software probing closes that gap.
When the attribute is present and the frame size exceeds the probe size, the prologue now emits a PS_probed_stackalloc pseudo that inlineStackProbe() expands into a compare-and-branch loop:
r29 = add(r29, #-ProbeSize)
memw(r29+#<!-- -->0) = #<!-- -->0
p0 = cmp.gtu(r29, r28)
if (p0) jump LoopMBB
r29 = r28
Both the frame-pointer and no-frame-pointer prologue paths are handled. The stack-probe-size attribute is respected for custom probe sizes.
---
Full diff: https://github.com/llvm/llvm-project/pull/190568.diff
6 Files Affected:
- (modified) llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp (+144-4)
- (modified) llvm/lib/Target/Hexagon/HexagonFrameLowering.h (+3)
- (modified) llvm/lib/Target/Hexagon/HexagonISelLowering.cpp (+18)
- (modified) llvm/lib/Target/Hexagon/HexagonISelLowering.h (+3)
- (modified) llvm/lib/Target/Hexagon/HexagonPseudo.td (+6)
- (added) llvm/test/CodeGen/Hexagon/stack-probing.ll (+166)
``````````diff
diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 0f73efc243986..2bd40d0eb6f28 100644
--- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -9,6 +9,7 @@
#include "HexagonFrameLowering.h"
#include "HexagonBlockRanges.h"
+#include "HexagonISelLowering.h"
#include "HexagonInstrInfo.h"
#include "HexagonMachineFunctionInfo.h"
#include "HexagonRegisterInfo.h"
@@ -746,9 +747,29 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB,
.addExternalSymbol("__runtime_stack_check");
} else if (NumBytes > 0) {
assert(alignTo(NumBytes, 8) == NumBytes);
- BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
- .addReg(SP)
- .addImm(-int(NumBytes));
+ auto *TLI = HST.getTargetLowering();
+ bool NeedsProbing = TLI->hasInlineStackProbe(MF);
+ unsigned ProbeSize = 0;
+ if (NeedsProbing) {
+ Align StackAlign = getStackAlign();
+ ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
+ }
+ if (NeedsProbing && NumBytes > ProbeSize) {
+ // Compute target SP in R28 (caller-saved scratch).
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), Hexagon::R28)
+ .addReg(SP)
+ .addImm(-int(NumBytes))
+ .setMIFlag(MachineInstr::FrameSetup);
+ // Emit pseudo to be expanded by inlineStackProbe().
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::PS_probed_stackalloc))
+ .addReg(Hexagon::R28)
+ .setMIFlag(MachineInstr::FrameSetup);
+ } else {
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
+ .addReg(SP)
+ .addImm(-int(NumBytes))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
}
}
@@ -892,7 +913,34 @@ void HexagonFrameLowering::insertAllocframe(MachineBasicBlock &MBB,
DebugLoc dl = MBB.findDebugLoc(InsertPt);
Register SP = HRI.getStackRegister();
- if (NumBytes >= ALLOCFRAME_MAX) {
+ auto *TLI = HST.getTargetLowering();
+ bool NeedsProbing = TLI->hasInlineStackProbe(MF) && NumBytes > 0;
+ unsigned ProbeSize = 0;
+ if (NeedsProbing) {
+ Align StackAlign = getStackAlign();
+ ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
+ }
+
+ if (NeedsProbing && NumBytes > ProbeSize) {
+ // Emit allocframe(#0) to save FP/LR only.
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
+ .addDef(SP)
+ .addReg(SP)
+ .addImm(0)
+ .addMemOperand(MMO)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // Compute target SP in R28 (caller-saved scratch).
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), Hexagon::R28)
+ .addReg(SP)
+ .addImm(-int(NumBytes))
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // Emit pseudo to be expanded by inlineStackProbe().
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::PS_probed_stackalloc))
+ .addReg(Hexagon::R28)
+ .setMIFlag(MachineInstr::FrameSetup);
+ } else if (NumBytes >= ALLOCFRAME_MAX) {
// Emit allocframe(#0).
BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
.addDef(SP)
@@ -917,6 +965,98 @@ void HexagonFrameLowering::insertAllocframe(MachineBasicBlock &MBB,
}
}
+void HexagonFrameLowering::inlineStackProbe(
+ MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
+ // Collect PS_probed_stackalloc pseudos to expand. Collecting first avoids
+ // issues with modifying the block while iterating.
+ SmallVector<MachineInstr *, 2> ToReplace;
+ for (MachineInstr &MI : PrologueMBB)
+ if (MI.getOpcode() == Hexagon::PS_probed_stackalloc)
+ ToReplace.push_back(&MI);
+
+ auto &HST = MF.getSubtarget<HexagonSubtarget>();
+ auto &HII = *HST.getInstrInfo();
+ auto *TLI = HST.getTargetLowering();
+ Align StackAlign = getStackAlign();
+ unsigned ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
+ MachineInstr::MIFlag Flags = MachineInstr::FrameSetup;
+
+ for (MachineInstr *MI : ToReplace) {
+ MachineBasicBlock::iterator MBBI = MI->getIterator();
+ DebugLoc DL = PrologueMBB.findDebugLoc(MBBI);
+ Register TargetReg = MI->getOperand(0).getReg();
+
+ // Split the block: everything after the pseudo goes into ExitMBB.
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineFunction::iterator InsertPt = std::next(MBB->getIterator());
+ MachineBasicBlock *LoopMBB =
+ MF.CreateMachineBasicBlock(MBB->getBasicBlock());
+ MF.insert(InsertPt, LoopMBB);
+ MachineBasicBlock *ExitMBB =
+ MF.CreateMachineBasicBlock(MBB->getBasicBlock());
+ MF.insert(InsertPt, ExitMBB);
+
+ // Move everything after the pseudo into ExitMBB.
+ ExitMBB->splice(ExitMBB->end(), MBB, std::next(MBBI), MBB->end());
+ ExitMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // LoopMBB: probe each page by decrementing SP and storing zero.
+ // When NumBytes is not an exact multiple of ProbeSize the loop
+ // will overshoot by up to ProbeSize-1 bytes; the final r29 = r28
+ // in ExitMBB corrects SP to the true target.
+ //
+ // The store is placed before the compare+branch so that the
+ // packetizer can bundle them into a single VLIW packet. All
+ // non-predicated instructions in a packet commit unconditionally,
+ // so the probe store executes on every iteration including the
+ // last (when the branch falls through).
+ //
+ // r29 = add(r29, #-ProbeSize)
+ // memw(r29+#0) = #0
+ // p0 = cmp.gtu(r29, r28)
+ // if (p0) jump LoopMBB
+ BuildMI(*LoopMBB, LoopMBB->end(), DL, HII.get(Hexagon::A2_addi),
+ Hexagon::R29)
+ .addReg(Hexagon::R29)
+ .addImm(-int(ProbeSize))
+ .setMIFlags(Flags);
+
+ BuildMI(*LoopMBB, LoopMBB->end(), DL, HII.get(Hexagon::S4_storeiri_io))
+ .addReg(Hexagon::R29)
+ .addImm(0)
+ .addImm(0)
+ .setMIFlags(Flags);
+
+ BuildMI(*LoopMBB, LoopMBB->end(), DL, HII.get(Hexagon::C2_cmpgtu),
+ Hexagon::P0)
+ .addReg(Hexagon::R29)
+ .addReg(TargetReg)
+ .setMIFlags(Flags);
+
+ BuildMI(*LoopMBB, LoopMBB->end(), DL, HII.get(Hexagon::J2_jumpt))
+ .addReg(Hexagon::P0)
+ .addMBB(LoopMBB)
+ .setMIFlags(Flags);
+
+ // ExitMBB: set final SP.
+ BuildMI(*ExitMBB, ExitMBB->begin(), DL, HII.get(Hexagon::A2_tfr),
+ Hexagon::R29)
+ .addReg(TargetReg)
+ .setMIFlags(Flags);
+
+ // Set up CFG edges.
+ MBB->addSuccessor(LoopMBB);
+ LoopMBB->addSuccessor(LoopMBB);
+ LoopMBB->addSuccessor(ExitMBB);
+
+ // Remove the pseudo.
+ MI->eraseFromParent();
+
+ // Recompute live-ins for the new blocks.
+ fullyRecomputeLiveIns({ExitMBB, LoopMBB});
+ }
+}
+
void HexagonFrameLowering::updateEntryPaths(MachineFunction &MF,
MachineBasicBlock &SaveB) const {
SetVector<unsigned> Worklist;
diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.h b/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
index 926aadb01f50e..285e6a5896461 100644
--- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -113,6 +113,9 @@ class HexagonFrameLowering : public TargetFrameLowering {
void insertCFIInstructions(MachineFunction &MF) const;
+ void inlineStackProbe(MachineFunction &MF,
+ MachineBasicBlock &PrologueMBB) const override;
+
protected:
bool hasFPImpl(const MachineFunction &MF) const override;
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 6b2a963e2e777..a81fd20d715f2 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -4004,3 +4004,21 @@ bool HexagonTargetLowering::isUsedByReturnOnly(SDNode *N,
Chain = Copy->getOperand(0);
return true;
}
+
+bool HexagonTargetLowering::hasInlineStackProbe(
+ const MachineFunction &MF) const {
+ if (MF.getFunction().hasFnAttribute("probe-stack"))
+ return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
+ "inline-asm";
+ return false;
+}
+
+unsigned HexagonTargetLowering::getStackProbeSize(const MachineFunction &MF,
+ Align StackAlign) const {
+ const Function &Fn = MF.getFunction();
+ unsigned StackProbeSize =
+ Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
+ // Round down to the stack alignment.
+ StackProbeSize = alignDown(StackProbeSize, StackAlign.value());
+ return StackProbeSize ? StackProbeSize : StackAlign.value();
+}
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index 55be8fd043ad5..60cf38a6abcc7 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -287,6 +287,9 @@ class HexagonTargetLowering : public TargetLowering {
return AtomicExpansionKind::LLSC;
}
+ bool hasInlineStackProbe(const MachineFunction &MF) const override;
+ unsigned getStackProbeSize(const MachineFunction &MF, Align StackAlign) const;
+
private:
void initializeHVXLowering();
unsigned getPreferredHvxVectorAction(MVT VecTy) const;
diff --git a/llvm/lib/Target/Hexagon/HexagonPseudo.td b/llvm/lib/Target/Hexagon/HexagonPseudo.td
index 4e5e8c3d26f00..fc74d8e2d7882 100644
--- a/llvm/lib/Target/Hexagon/HexagonPseudo.td
+++ b/llvm/lib/Target/Hexagon/HexagonPseudo.td
@@ -335,6 +335,12 @@ let Defs = [R29], hasSideEffects = 1 in
def PS_alloca: Pseudo <(outs IntRegs:$Rd),
(ins IntRegs:$Rs, u32_0Imm:$A), "", []>;
+// Probed stack allocation pseudo. Expanded in inlineStackProbe() into a
+// compare-and-branch loop that touches each page.
+let Defs = [R29, P0], Uses = [R29], hasSideEffects = 1, mayStore = 1,
+ isCodeGenOnly = 1 in
+def PS_probed_stackalloc : Pseudo<(outs), (ins IntRegs:$target), "", []>;
+
// Load predicate.
let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in
diff --git a/llvm/test/CodeGen/Hexagon/stack-probing.ll b/llvm/test/CodeGen/Hexagon/stack-probing.ll
new file mode 100644
index 0000000000000..a59556a782a53
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/stack-probing.ll
@@ -0,0 +1,166 @@
+; RUN: llc -mtriple=hexagon < %s | FileCheck %s
+
+;; Small frame (< probe size): no probing loop, normal allocframe.
+; CHECK-LABEL: small_frame:
+; CHECK: allocframe(r29,#128):raw
+; CHECK-NOT: cmp.gtu
+; CHECK: dealloc_return
+define void @small_frame() #0 {
+entry:
+ %a = alloca [128 x i8], align 1
+ call void @use(ptr %a)
+ ret void
+}
+
+;; Large frame (> probe size): probing loop emitted.
+; CHECK-LABEL: large_frame:
+; CHECK: allocframe(r29,#0):raw
+; CHECK: r28 = add(r29,#-8192)
+; CHECK: .LBB{{[0-9]+}}_{{[0-9]+}}:
+; CHECK: r29 = add(r29,#-4096)
+; CHECK: p0 = cmp.gtu(r29,r28)
+; CHECK: if (p0.new) jump:t .LBB
+; CHECK: memw(r29+#0) = #0
+; CHECK: r29 = r28
+define void @large_frame() #0 {
+entry:
+ %a = alloca [8192 x i8], align 1
+ call void @use(ptr %a)
+ ret void
+}
+
+;; Exact multiple of probe size: probing loop still emitted.
+; CHECK-LABEL: exact_multiple:
+; CHECK: allocframe(r29,#0):raw
+; CHECK: r28 = add(r29,#-12288)
+; CHECK: .LBB{{[0-9]+}}_{{[0-9]+}}:
+; CHECK: r29 = add(r29,#-4096)
+; CHECK: p0 = cmp.gtu(r29,r28)
+; CHECK: if (p0.new) jump:t .LBB
+; CHECK: memw(r29+#0) = #0
+; CHECK: r29 = r28
+define void @exact_multiple() #0 {
+entry:
+ %a = alloca [12288 x i8], align 1
+ call void @use(ptr %a)
+ ret void
+}
+
+;; No frame pointer path with no call: probing works without allocframe.
+; CHECK-LABEL: no_fp_large:
+; CHECK-NOT: allocframe
+; CHECK: r28 = add(r29,#-8192)
+; CHECK: .LBB{{[0-9]+}}_{{[0-9]+}}:
+; CHECK: r29 = add(r29,#-4096)
+; CHECK: p0 = cmp.gtu(r29,r28)
+; CHECK: if (p0.new) jump:t .LBB
+; CHECK: memw(r29+#0) = #0
+; CHECK: r29 = r28
+define void @no_fp_large() #1 {
+entry:
+ %a = alloca [8192 x i8], align 1
+ store volatile i8 0, ptr %a
+ ret void
+}
+
+;; Custom probe size of 512 bytes.
+; CHECK-LABEL: custom_probe_size:
+; CHECK: allocframe(r29,#0):raw
+; CHECK: r28 = add(r29,#-8192)
+; CHECK: .LBB{{[0-9]+}}_{{[0-9]+}}:
+; CHECK: r29 = add(r29,#-512)
+; CHECK: p0 = cmp.gtu(r29,r28)
+; CHECK: if (p0.new) jump:t .LBB
+; CHECK: memw(r29+#0) = #0
+; CHECK: r29 = r28
+define void @custom_probe_size() #2 {
+entry:
+ %a = alloca [8192 x i8], align 1
+ call void @use(ptr %a)
+ ret void
+}
+
+;; No probe attribute: normal codegen, no probing.
+; CHECK-LABEL: no_probe:
+; CHECK: allocframe
+; CHECK-NOT: cmp.gtu
+; CHECK: dealloc_return
+define void @no_probe() {
+entry:
+ %a = alloca [8192 x i8], align 1
+ call void @use(ptr %a)
+ ret void
+}
+
+;; Frame >= ALLOCFRAME_MAX (16384): allocframe(#0) + probed alloc.
+; CHECK-LABEL: very_large_frame:
+; CHECK: allocframe(r29,#0):raw
+; CHECK: r28 = add(r29,#-20480)
+; CHECK: .LBB{{[0-9]+}}_{{[0-9]+}}:
+; CHECK: r29 = add(r29,#-4096)
+; CHECK: p0 = cmp.gtu(r29,r28)
+; CHECK: if (p0.new) jump:t .LBB
+; CHECK: memw(r29+#0) = #0
+; CHECK: r29 = r28
+define void @very_large_frame() #0 {
+entry:
+ %a = alloca [20480 x i8], align 1
+ call void @use(ptr %a)
+ ret void
+}
+
+;; Frame == probe size exactly: no probing loop, normal allocframe.
+; CHECK-LABEL: exact_probe_size:
+; CHECK: allocframe(r29,#4096):raw
+; CHECK-NOT: cmp.gtu
+; CHECK: dealloc_return
+define void @exact_probe_size() #0 {
+entry:
+ %a = alloca [4096 x i8], align 1
+ call void @use(ptr %a)
+ ret void
+}
+
+;; Large frame requiring constant-extended immediate (> 32767).
+; CHECK-LABEL: const_extd_frame:
+; CHECK: allocframe(r29,#0):raw
+; CHECK: r28 = add(r29,##-65536)
+; CHECK: .LBB{{[0-9]+}}_{{[0-9]+}}:
+; CHECK: r29 = add(r29,#-4096)
+; CHECK: p0 = cmp.gtu(r29,r28)
+; CHECK: if (p0.new) jump:t .LBB
+; CHECK: memw(r29+#0) = #0
+; CHECK: r29 = r28
+define void @const_extd_frame() #0 {
+entry:
+ %a = alloca [65536 x i8], align 1
+ call void @use(ptr %a)
+ ret void
+}
+
+;; Callee-saved register spills coexist with probing.
+; CHECK-LABEL: callee_saved_regs:
+; CHECK: allocframe(r29,#0):raw
+; CHECK: r28 = add(r29,#-8216)
+; CHECK: .LBB{{[0-9]+}}_{{[0-9]+}}:
+; CHECK: r29 = add(r29,#-4096)
+; CHECK: p0 = cmp.gtu(r29,r28)
+; CHECK: if (p0.new) jump:t .LBB
+; CHECK: memw(r29+#0) = #0
+; CHECK: r29 = r28
+; CHECK: memd(r29+##{{[0-9]+}}) = r{{[0-9]+}}:{{[0-9]+}}
+; CHECK: dealloc_return
+define void @callee_saved_regs(ptr %p) #0 {
+entry:
+ %a = alloca [8192 x i8], align 1
+ call void @use(ptr %a)
+ call void asm sideeffect "", "~{r16},~{r17},~{r18},~{r19}"()
+ call void @use(ptr %p)
+ ret void
+}
+
+declare void @use(ptr)
+
+attributes #0 = { "probe-stack"="inline-asm" }
+attributes #1 = { nounwind "frame-pointer"="none" "probe-stack"="inline-asm" }
+attributes #2 = { "probe-stack"="inline-asm" "stack-probe-size"="512" }
``````````
</details>
https://github.com/llvm/llvm-project/pull/190568
More information about the llvm-commits
mailing list