[llvm] c352e08 - [PowerPC] Implement probing for prologue
Kai Luo via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 3 20:31:17 PDT 2020
Author: Kai Luo
Date: 2020-07-04T03:07:08Z
New Revision: c352e0885a6b402e5d92cb82f0d0c4e933626b45
URL: https://github.com/llvm/llvm-project/commit/c352e0885a6b402e5d92cb82f0d0c4e933626b45
DIFF: https://github.com/llvm/llvm-project/commit/c352e0885a6b402e5d92cb82f0d0c4e933626b45.diff
LOG: [PowerPC] Implement probing for prologue
This patch is part of supporting `-fstack-clash-protection`. Implemented
probing when emitting prologue.
Differential Revision: https://reviews.llvm.org/D81460
Added:
llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
Modified:
llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
llvm/lib/Target/PowerPC/PPCFrameLowering.h
llvm/lib/Target/PowerPC/PPCInstr64Bit.td
llvm/lib/Target/PowerPC/PPCInstrInfo.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 2c95b97bafc1..93ea3154e47f 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/PPCPredicates.h"
#include "PPCFrameLowering.h"
#include "PPCInstrBuilder.h"
#include "PPCInstrInfo.h"
@@ -31,6 +32,7 @@ using namespace llvm;
#define DEBUG_TYPE "framelowering"
STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
+STATISTIC(NumPrologProbed, "Number of prologues probed");
static cl::opt<bool>
EnablePEVectorSpills("ppc-enable-pe-vector-spills",
@@ -757,6 +759,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
MachineFrameInfo &MFI = MF.getFrameInfo();
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
@@ -845,9 +848,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
"FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
// Using the same bool variable as below to suppress compiler warnings.
- bool SingleScratchReg =
- findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB),
- &ScratchReg, &TempReg);
+ // Stack probe requires two scratch registers, one for old sp, one for large
+ // frame and large probe size.
+ bool SingleScratchReg = findScratchRegister(
+ &MBB, false,
+ twoUniqueScratchRegsRequired(&MBB) || TLI.hasInlineStackProbe(MF),
+ &ScratchReg, &TempReg);
assert(SingleScratchReg &&
"Required number of registers not available in this block");
@@ -1023,58 +1029,81 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
// the negated frame size will be placed in ScratchReg.
bool HasSTUX = false;
- // This condition must be kept in sync with canUseAsPrologue.
- if (HasBP && MaxAlign > 1) {
- if (isPPC64)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
- .addReg(SPReg)
- .addImm(0)
- .addImm(64 - Log2(MaxAlign));
- else // PPC32...
- BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
- .addReg(SPReg)
- .addImm(0)
- .addImm(32 - Log2(MaxAlign))
- .addImm(31);
- if (!isLargeFrame) {
- BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
- .addReg(ScratchReg, RegState::Kill)
+ // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain
+ // pointer is always stored at SP, we will get a free probe due to an essential
+ // STU(X) instruction.
+ if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) {
+ // To be consistent with other targets, a pseudo instruction is emitted and
+ // will be later expanded in `inlineStackProbe`.
+ BuildMI(MBB, MBBI, dl,
+ TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64
+ : PPC::PROBED_STACKALLOC_32))
+ .addDef(ScratchReg)
+ .addDef(TempReg) // TempReg stores the old sp.
.addImm(NegFrameSize);
- } else {
- assert(!SingleScratchReg && "Only a single scratch reg available");
- BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
- .addImm(NegFrameSize >> 16);
- BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
- .addReg(TempReg, RegState::Kill)
- .addImm(NegFrameSize & 0xFFFF);
- BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
- .addReg(ScratchReg, RegState::Kill)
- .addReg(TempReg, RegState::Kill);
+ // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we
+ // update the ScratchReg to meet the assumption that ScratchReg contains
+ // the NegFrameSize. This solution is rather tricky.
+ if (!HasRedZone) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
+ .addReg(TempReg)
+ .addReg(SPReg);
+ HasSTUX = true;
}
+ } else {
+ // This condition must be kept in sync with canUseAsPrologue.
+ if (HasBP && MaxAlign > 1) {
+ if (isPPC64)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
+ .addReg(SPReg)
+ .addImm(0)
+ .addImm(64 - Log2(MaxAlign));
+ else // PPC32...
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
+ .addReg(SPReg)
+ .addImm(0)
+ .addImm(32 - Log2(MaxAlign))
+ .addImm(31);
+ if (!isLargeFrame) {
+ BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .addImm(NegFrameSize);
+ } else {
+ assert(!SingleScratchReg && "Only a single scratch reg available");
+ BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
+ .addImm(NegFrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
+ .addReg(TempReg, RegState::Kill)
+ .addImm(NegFrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .addReg(TempReg, RegState::Kill);
+ }
- BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
- .addReg(SPReg, RegState::Kill)
- .addReg(SPReg)
- .addReg(ScratchReg);
- HasSTUX = true;
+ BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
+ .addReg(SPReg, RegState::Kill)
+ .addReg(SPReg)
+ .addReg(ScratchReg);
+ HasSTUX = true;
- } else if (!isLargeFrame) {
- BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
- .addReg(SPReg)
- .addImm(NegFrameSize)
- .addReg(SPReg);
+ } else if (!isLargeFrame) {
+ BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
+ .addReg(SPReg)
+ .addImm(NegFrameSize)
+ .addReg(SPReg);
- } else {
- BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
- .addImm(NegFrameSize >> 16);
- BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
- .addReg(ScratchReg, RegState::Kill)
- .addImm(NegFrameSize & 0xFFFF);
- BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
- .addReg(SPReg, RegState::Kill)
- .addReg(SPReg)
- .addReg(ScratchReg);
- HasSTUX = true;
+ } else {
+ BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
+ .addImm(NegFrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .addImm(NegFrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
+ .addReg(SPReg, RegState::Kill)
+ .addReg(SPReg)
+ .addReg(ScratchReg);
+ HasSTUX = true;
+ }
}
// Save the TOC register after the stack pointer update if a prologue TOC
@@ -1335,6 +1364,142 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
}
}
+void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
+ MachineBasicBlock &PrologMBB) const {
+ // TODO: Generate CFI instructions.
+ bool isPPC64 = Subtarget.isPPC64();
+ const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
+ const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
+ int Opc = MI.getOpcode();
+ return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32;
+ });
+ if (StackAllocMIPos == PrologMBB.end())
+ return;
+ const BasicBlock *ProbedBB = PrologMBB.getBasicBlock();
+ DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos);
+ MachineInstr &MI = *StackAllocMIPos;
+ int64_t NegFrameSize = MI.getOperand(2).getImm();
+ int64_t NegProbeSize = -(int64_t)TLI.getStackProbeSize(MF);
+ assert(isInt<32>(NegProbeSize) && "Unhandled probe size");
+ int64_t NumBlocks = NegFrameSize / NegProbeSize;
+ int64_t NegResidualSize = NegFrameSize % NegProbeSize;
+ Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
+ Register ScratchReg = MI.getOperand(0).getReg();
+ Register FPReg = MI.getOperand(1).getReg();
+ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ bool HasBP = RegInfo->hasBasePointer(MF);
+ Align MaxAlign = MFI.getMaxAlign();
+ // Initialize current frame pointer.
+ const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR);
+ BuildMI(PrologMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
+ // Subroutine to determine if we can use the Imm as part of d-form.
+ auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; };
+ // Subroutine to materialize the Imm into TempReg.
+ auto MaterializeImm = [&](MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, int64_t Imm,
+ Register &TempReg) {
+ assert(isInt<32>(Imm) && "Unhandled imm");
+ if (isInt<16>(Imm))
+ BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg)
+ .addImm(Imm);
+ else {
+ BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
+ .addImm(Imm >> 16);
+ BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg)
+ .addReg(TempReg)
+ .addImm(Imm & 0xFFFF);
+ }
+ };
+ // Subroutine to store frame pointer and decrease stack pointer by probe size.
+ auto allocateAndProbe = [&](MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, int64_t NegSize,
+ Register NegSizeReg, bool UseDForm) {
+ if (UseDForm)
+ BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg)
+ .addReg(FPReg)
+ .addImm(NegSize)
+ .addReg(SPReg);
+ else
+ BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
+ .addReg(FPReg)
+ .addReg(SPReg)
+ .addReg(NegSizeReg);
+ };
+ // For case HasBP && MaxAlign > 1, we have to align the SP by performing
+ // SP = SP - SP % MaxAlign.
+ if (HasBP && MaxAlign > 1) {
+ if (isPPC64)
+ BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg)
+ .addReg(FPReg)
+ .addImm(0)
+ .addImm(64 - Log2(MaxAlign));
+ else
+ BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
+ .addReg(FPReg)
+ .addImm(0)
+ .addImm(32 - Log2(MaxAlign))
+ .addImm(31);
+ BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX),
+ SPReg)
+ .addReg(FPReg)
+ .addReg(SPReg)
+ .addReg(ScratchReg);
+ }
+ // Probe residual part.
+ if (NegResidualSize) {
+ bool ResidualUseDForm = CanUseDForm(NegResidualSize);
+ if (!ResidualUseDForm)
+ MaterializeImm(PrologMBB, {MI}, NegResidualSize, ScratchReg);
+ allocateAndProbe(PrologMBB, {MI}, NegResidualSize, ScratchReg,
+ ResidualUseDForm);
+ }
+ bool UseDForm = CanUseDForm(NegProbeSize);
+ // If number of blocks is small, just probe them directly.
+ if (NumBlocks < 3) {
+ if (!UseDForm)
+ MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg);
+ for (int i = 0; i < NumBlocks; ++i)
+ allocateAndProbe(PrologMBB, {MI}, NegProbeSize, ScratchReg, UseDForm);
+ } else {
+ // Since CTR is a volatile register and current shrinkwrap implementation
+ // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
+ // CTR loop to probe.
+ // Calculate trip count and stores it in CTRReg.
+ MaterializeImm(PrologMBB, {MI}, NumBlocks, ScratchReg);
+ BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
+ .addReg(ScratchReg, RegState::Kill);
+ if (!UseDForm)
+ MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg);
+ // Create MBBs of the loop.
+ MachineFunction::iterator MBBInsertPoint =
+ std::next(PrologMBB.getIterator());
+ MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
+ MF.insert(MBBInsertPoint, LoopMBB);
+ MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
+ MF.insert(MBBInsertPoint, ExitMBB);
+ // Synthesize the loop body.
+ allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
+ UseDForm);
+ BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
+ .addMBB(LoopMBB);
+ LoopMBB->addSuccessor(ExitMBB);
+ LoopMBB->addSuccessor(LoopMBB);
+ // Synthesize the exit MBB.
+ ExitMBB->splice(ExitMBB->end(), &PrologMBB,
+ std::next(MachineBasicBlock::iterator(MI)),
+ PrologMBB.end());
+ ExitMBB->transferSuccessorsAndUpdatePHIs(&PrologMBB);
+ PrologMBB.addSuccessor(LoopMBB);
+ // Update liveins.
+ recomputeLiveIns(*LoopMBB);
+ recomputeLiveIns(*ExitMBB);
+ }
+ ++NumPrologProbed;
+ MI.eraseFromParent();
+}
+
void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/llvm/lib/Target/PowerPC/PPCFrameLowering.h
index dd832ffba643..8bf52c0ed01a 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.h
@@ -100,6 +100,8 @@ class PPCFrameLowering: public TargetFrameLowering {
/// the function.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+ void inlineStackProbe(MachineFunction &MF,
+ MachineBasicBlock &PrologMBB) const override;
bool hasFP(const MachineFunction &MF) const override;
bool needsFP(const MachineFunction &MF) const;
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 25001ad8ace3..1c457d4170d5 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -434,6 +434,9 @@ def PROBED_ALLOCA_64 : PPCCustomInserterPseudo<(outs g8rc:$result),
def PREPARE_PROBED_ALLOCA_64 : PPCEmitTimePseudo<(outs g8rc:$fp,
g8rc:$sp),
(ins g8rc:$negsize, memri:$fpsi), "#PREPARE_PROBED_ALLOCA_64", []>;
+def PROBED_STACKALLOC_64 : PPCEmitTimePseudo<(outs g8rc:$scratch, g8rc:$temp),
+ (ins i64imm:$stacksize),
+ "#PROBED_STACKALLOC_64", []>;
}
let hasSideEffects = 0 in {
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 9c986fc52b68..673ab63039cf 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1409,6 +1409,9 @@ def PROBED_ALLOCA_32 : PPCCustomInserterPseudo<(outs gprc:$result),
def PREPARE_PROBED_ALLOCA_32 : PPCEmitTimePseudo<(outs gprc:$fp,
gprc:$sp),
(ins gprc:$negsize, memri:$fpsi), "#PREPARE_PROBED_ALLOCA_32", []>;
+def PROBED_STACKALLOC_32 : PPCEmitTimePseudo<(outs gprc:$scratch, gprc:$temp),
+ (ins i64imm:$stacksize),
+ "#PROBED_STACKALLOC_32", []>;
}
// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
diff --git a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
new file mode 100644
index 000000000000..e595d8a732a5
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
@@ -0,0 +1,474 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs \
+; RUN: -mtriple=powerpc64le-linux-gnu < %s | FileCheck \
+; RUN: -check-prefix=CHECK-LE %s
+; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs \
+; RUN: -mtriple=powerpc64-linux-gnu < %s | FileCheck \
+; RUN: -check-prefix=CHECK-BE %s
+; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs \
+; RUN: -mtriple=powerpc-linux-gnu < %s | FileCheck \
+; RUN: -check-prefix=CHECK-32 %s
+
+; Free probe
+define i8 @f0() #0 nounwind {
+; CHECK-LE-LABEL: f0:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: li r3, 3
+; CHECK-LE-NEXT: stb r3, -64(r1)
+; CHECK-LE-NEXT: lbz r3, -64(r1)
+; CHECK-LE-NEXT: blr
+;
+; CHECK-BE-LABEL: f0:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: li r3, 3
+; CHECK-BE-NEXT: stb r3, -64(r1)
+; CHECK-BE-NEXT: lbz r3, -64(r1)
+; CHECK-BE-NEXT: blr
+;
+; CHECK-32-LABEL: f0:
+; CHECK-32: # %bb.0: # %entry
+; CHECK-32-NEXT: stwu r1, -80(r1)
+; CHECK-32-NEXT: li r3, 3
+; CHECK-32-NEXT: stb r3, 16(r1)
+; CHECK-32-NEXT: lbz r3, 16(r1)
+; CHECK-32-NEXT: addi r1, r1, 80
+; CHECK-32-NEXT: blr
+entry:
+ %a = alloca i8, i64 64
+ %b = getelementptr inbounds i8, i8* %a, i64 63
+ store volatile i8 3, i8* %a
+ %c = load volatile i8, i8* %a
+ ret i8 %c
+}
+
+define i8 @f1() #0 "stack-probe-size"="0" nounwind {
+; CHECK-LE-LABEL: f1:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: mr r12, r1
+; CHECK-LE-NEXT: li r0, 259
+; CHECK-LE-NEXT: mtctr r0
+; CHECK-LE-NEXT: .LBB1_1: # %entry
+; CHECK-LE-NEXT: #
+; CHECK-LE-NEXT: stdu r12, -16(r1)
+; CHECK-LE-NEXT: bdnz .LBB1_1
+; CHECK-LE-NEXT: # %bb.2: # %entry
+; CHECK-LE-NEXT: li r3, 3
+; CHECK-LE-NEXT: stb r3, 48(r1)
+; CHECK-LE-NEXT: lbz r3, 48(r1)
+; CHECK-LE-NEXT: addi r1, r1, 4144
+; CHECK-LE-NEXT: blr
+;
+; CHECK-BE-LABEL: f1:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: mr r12, r1
+; CHECK-BE-NEXT: li r0, 260
+; CHECK-BE-NEXT: mtctr r0
+; CHECK-BE-NEXT: .LBB1_1: # %entry
+; CHECK-BE-NEXT: #
+; CHECK-BE-NEXT: stdu r12, -16(r1)
+; CHECK-BE-NEXT: bdnz .LBB1_1
+; CHECK-BE-NEXT: # %bb.2: # %entry
+; CHECK-BE-NEXT: li r3, 3
+; CHECK-BE-NEXT: stb r3, 64(r1)
+; CHECK-BE-NEXT: lbz r3, 64(r1)
+; CHECK-BE-NEXT: addi r1, r1, 4160
+; CHECK-BE-NEXT: blr
+;
+; CHECK-32-LABEL: f1:
+; CHECK-32: # %bb.0: # %entry
+; CHECK-32-NEXT: mr r12, r1
+; CHECK-32-NEXT: li r0, 257
+; CHECK-32-NEXT: mtctr r0
+; CHECK-32-NEXT: .LBB1_1: # %entry
+; CHECK-32-NEXT: #
+; CHECK-32-NEXT: stwu r12, -16(r1)
+; CHECK-32-NEXT: bdnz .LBB1_1
+; CHECK-32-NEXT: # %bb.2: # %entry
+; CHECK-32-NEXT: li r3, 3
+; CHECK-32-NEXT: sub r0, r1, r12
+; CHECK-32-NEXT: stb r3, 16(r1)
+; CHECK-32-NEXT: sub r0, r1, r0
+; CHECK-32-NEXT: lbz r3, 16(r1)
+; CHECK-32-NEXT: addi r1, r1, 4112
+; CHECK-32-NEXT: blr
+entry:
+ %a = alloca i8, i64 4096
+ %b = getelementptr inbounds i8, i8* %a, i64 63
+ store volatile i8 3, i8* %a
+ %c = load volatile i8, i8* %a
+ ret i8 %c
+}
+
+define i8 @f2() #0 nounwind {
+; CHECK-LE-LABEL: f2:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: mr r12, r1
+; CHECK-LE-NEXT: stdu r12, -48(r1)
+; CHECK-LE-NEXT: li r0, 16
+; CHECK-LE-NEXT: mtctr r0
+; CHECK-LE-NEXT: .LBB2_1: # %entry
+; CHECK-LE-NEXT: #
+; CHECK-LE-NEXT: stdu r12, -4096(r1)
+; CHECK-LE-NEXT: bdnz .LBB2_1
+; CHECK-LE-NEXT: # %bb.2: # %entry
+; CHECK-LE-NEXT: li r3, 3
+; CHECK-LE-NEXT: stb r3, 48(r1)
+; CHECK-LE-NEXT: lbz r3, 48(r1)
+; CHECK-LE-NEXT: ld r1, 0(r1)
+; CHECK-LE-NEXT: blr
+;
+; CHECK-BE-LABEL: f2:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: mr r12, r1
+; CHECK-BE-NEXT: stdu r12, -64(r1)
+; CHECK-BE-NEXT: li r0, 16
+; CHECK-BE-NEXT: mtctr r0
+; CHECK-BE-NEXT: .LBB2_1: # %entry
+; CHECK-BE-NEXT: #
+; CHECK-BE-NEXT: stdu r12, -4096(r1)
+; CHECK-BE-NEXT: bdnz .LBB2_1
+; CHECK-BE-NEXT: # %bb.2: # %entry
+; CHECK-BE-NEXT: li r3, 3
+; CHECK-BE-NEXT: stb r3, 64(r1)
+; CHECK-BE-NEXT: lbz r3, 64(r1)
+; CHECK-BE-NEXT: ld r1, 0(r1)
+; CHECK-BE-NEXT: blr
+;
+; CHECK-32-LABEL: f2:
+; CHECK-32: # %bb.0: # %entry
+; CHECK-32-NEXT: mr r12, r1
+; CHECK-32-NEXT: stwu r12, -16(r1)
+; CHECK-32-NEXT: li r0, 16
+; CHECK-32-NEXT: mtctr r0
+; CHECK-32-NEXT: .LBB2_1: # %entry
+; CHECK-32-NEXT: #
+; CHECK-32-NEXT: stwu r12, -4096(r1)
+; CHECK-32-NEXT: bdnz .LBB2_1
+; CHECK-32-NEXT: # %bb.2: # %entry
+; CHECK-32-NEXT: sub r0, r1, r12
+; CHECK-32-NEXT: li r3, 3
+; CHECK-32-NEXT: sub r0, r1, r0
+; CHECK-32-NEXT: stb r3, 16(r1)
+; CHECK-32-NEXT: mr r0, r31
+; CHECK-32-NEXT: lbz r3, 16(r1)
+; CHECK-32-NEXT: lwz r31, 0(r1)
+; CHECK-32-NEXT: mr r1, r31
+; CHECK-32-NEXT: mr r31, r0
+; CHECK-32-NEXT: blr
+entry:
+ %a = alloca i8, i64 65536
+ %b = getelementptr inbounds i8, i8* %a, i64 63
+ store volatile i8 3, i8* %a
+ %c = load volatile i8, i8* %a
+ ret i8 %c
+}
+
+define i8 @f3() #0 "stack-probe-size"="32768" nounwind {
+; CHECK-LE-LABEL: f3:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: mr r12, r1
+; CHECK-LE-NEXT: stdu r12, -48(r1)
+; CHECK-LE-NEXT: stdu r12, -32768(r1)
+; CHECK-LE-NEXT: stdu r12, -32768(r1)
+; CHECK-LE-NEXT: li r3, 3
+; CHECK-LE-NEXT: stb r3, 48(r1)
+; CHECK-LE-NEXT: lbz r3, 48(r1)
+; CHECK-LE-NEXT: ld r1, 0(r1)
+; CHECK-LE-NEXT: blr
+;
+; CHECK-BE-LABEL: f3:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: mr r12, r1
+; CHECK-BE-NEXT: stdu r12, -64(r1)
+; CHECK-BE-NEXT: stdu r12, -32768(r1)
+; CHECK-BE-NEXT: stdu r12, -32768(r1)
+; CHECK-BE-NEXT: li r3, 3
+; CHECK-BE-NEXT: stb r3, 64(r1)
+; CHECK-BE-NEXT: lbz r3, 64(r1)
+; CHECK-BE-NEXT: ld r1, 0(r1)
+; CHECK-BE-NEXT: blr
+;
+; CHECK-32-LABEL: f3:
+; CHECK-32: # %bb.0: # %entry
+; CHECK-32-NEXT: mr r12, r1
+; CHECK-32-NEXT: stwu r12, -16(r1)
+; CHECK-32-NEXT: stwu r12, -32768(r1)
+; CHECK-32-NEXT: stwu r12, -32768(r1)
+; CHECK-32-NEXT: sub r0, r1, r12
+; CHECK-32-NEXT: li r3, 3
+; CHECK-32-NEXT: sub r0, r1, r0
+; CHECK-32-NEXT: stb r3, 16(r1)
+; CHECK-32-NEXT: mr r0, r31
+; CHECK-32-NEXT: lbz r3, 16(r1)
+; CHECK-32-NEXT: lwz r31, 0(r1)
+; CHECK-32-NEXT: mr r1, r31
+; CHECK-32-NEXT: mr r31, r0
+; CHECK-32-NEXT: blr
+entry:
+ %a = alloca i8, i64 65536
+ %b = getelementptr inbounds i8, i8* %a, i64 63
+ store volatile i8 3, i8* %a
+ %c = load volatile i8, i8* %a
+ ret i8 %c
+}
+
+; Same as f2, but without protection.
+define i8 @f4() nounwind {
+; CHECK-LE-LABEL: f4:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: lis r0, -2
+; CHECK-LE-NEXT: ori r0, r0, 65488
+; CHECK-LE-NEXT: stdux r1, r1, r0
+; CHECK-LE-NEXT: li r3, 3
+; CHECK-LE-NEXT: stb r3, 48(r1)
+; CHECK-LE-NEXT: lbz r3, 48(r1)
+; CHECK-LE-NEXT: ld r1, 0(r1)
+; CHECK-LE-NEXT: blr
+;
+; CHECK-BE-LABEL: f4:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lis r0, -2
+; CHECK-BE-NEXT: ori r0, r0, 65472
+; CHECK-BE-NEXT: stdux r1, r1, r0
+; CHECK-BE-NEXT: li r3, 3
+; CHECK-BE-NEXT: stb r3, 64(r1)
+; CHECK-BE-NEXT: lbz r3, 64(r1)
+; CHECK-BE-NEXT: ld r1, 0(r1)
+; CHECK-BE-NEXT: blr
+;
+; CHECK-32-LABEL: f4:
+; CHECK-32: # %bb.0: # %entry
+; CHECK-32-NEXT: lis r0, -2
+; CHECK-32-NEXT: ori r0, r0, 65520
+; CHECK-32-NEXT: stwux r1, r1, r0
+; CHECK-32-NEXT: li r3, 3
+; CHECK-32-NEXT: sub r0, r1, r0
+; CHECK-32-NEXT: stb r3, 16(r1)
+; CHECK-32-NEXT: mr r0, r31
+; CHECK-32-NEXT: lbz r3, 16(r1)
+; CHECK-32-NEXT: lwz r31, 0(r1)
+; CHECK-32-NEXT: mr r1, r31
+; CHECK-32-NEXT: mr r31, r0
+; CHECK-32-NEXT: blr
+entry:
+ %a = alloca i8, i64 65536
+ %b = getelementptr inbounds i8, i8* %a, i64 63
+ store volatile i8 3, i8* %a
+ %c = load volatile i8, i8* %a
+ ret i8 %c
+}
+
+define i8 @f5() #0 "stack-probe-size"="65536" nounwind {
+; CHECK-LE-LABEL: f5:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: mr r12, r1
+; CHECK-LE-NEXT: stdu r12, -48(r1)
+; CHECK-LE-NEXT: li r0, 16
+; CHECK-LE-NEXT: mtctr r0
+; CHECK-LE-NEXT: lis r0, -1
+; CHECK-LE-NEXT: nop
+; CHECK-LE-NEXT: .LBB5_1: # %entry
+; CHECK-LE-NEXT: #
+; CHECK-LE-NEXT: stdux r12, r1, r0
+; CHECK-LE-NEXT: bdnz .LBB5_1
+; CHECK-LE-NEXT: # %bb.2: # %entry
+; CHECK-LE-NEXT: li r3, 3
+; CHECK-LE-NEXT: stb r3, 48(r1)
+; CHECK-LE-NEXT: lbz r3, 48(r1)
+; CHECK-LE-NEXT: ld r1, 0(r1)
+; CHECK-LE-NEXT: blr
+;
+; CHECK-BE-LABEL: f5:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: mr r12, r1
+; CHECK-BE-NEXT: stdu r12, -64(r1)
+; CHECK-BE-NEXT: li r0, 16
+; CHECK-BE-NEXT: mtctr r0
+; CHECK-BE-NEXT: lis r0, -1
+; CHECK-BE-NEXT: nop
+; CHECK-BE-NEXT: .LBB5_1: # %entry
+; CHECK-BE-NEXT: #
+; CHECK-BE-NEXT: stdux r12, r1, r0
+; CHECK-BE-NEXT: bdnz .LBB5_1
+; CHECK-BE-NEXT: # %bb.2: # %entry
+; CHECK-BE-NEXT: li r3, 3
+; CHECK-BE-NEXT: stb r3, 64(r1)
+; CHECK-BE-NEXT: lbz r3, 64(r1)
+; CHECK-BE-NEXT: ld r1, 0(r1)
+; CHECK-BE-NEXT: blr
+;
+; CHECK-32-LABEL: f5:
+; CHECK-32: # %bb.0: # %entry
+; CHECK-32-NEXT: mr r12, r1
+; CHECK-32-NEXT: stwu r12, -16(r1)
+; CHECK-32-NEXT: li r0, 16
+; CHECK-32-NEXT: mtctr r0
+; CHECK-32-NEXT: lis r0, -1
+; CHECK-32-NEXT: nop
+; CHECK-32-NEXT: .LBB5_1: # %entry
+; CHECK-32-NEXT: #
+; CHECK-32-NEXT: stwux r12, r1, r0
+; CHECK-32-NEXT: bdnz .LBB5_1
+; CHECK-32-NEXT: # %bb.2: # %entry
+; CHECK-32-NEXT: sub r0, r1, r12
+; CHECK-32-NEXT: li r3, 3
+; CHECK-32-NEXT: sub r0, r1, r0
+; CHECK-32-NEXT: stb r3, 16(r1)
+; CHECK-32-NEXT: mr r0, r31
+; CHECK-32-NEXT: lbz r3, 16(r1)
+; CHECK-32-NEXT: lwz r31, 0(r1)
+; CHECK-32-NEXT: mr r1, r31
+; CHECK-32-NEXT: mr r31, r0
+; CHECK-32-NEXT: blr
+entry:
+ %a = alloca i8, i64 1048576
+ %b = getelementptr inbounds i8, i8* %a, i64 63
+ store volatile i8 3, i8* %a
+ %c = load volatile i8, i8* %a
+ ret i8 %c
+}
+
+define i8 @f6() #0 nounwind {
+; CHECK-LE-LABEL: f6:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: mr r12, r1
+; CHECK-LE-NEXT: stdu r12, -48(r1)
+; CHECK-LE-NEXT: lis r0, 4
+; CHECK-LE-NEXT: nop
+; CHECK-LE-NEXT: mtctr r0
+; CHECK-LE-NEXT: .LBB6_1: # %entry
+; CHECK-LE-NEXT: #
+; CHECK-LE-NEXT: stdu r12, -4096(r1)
+; CHECK-LE-NEXT: bdnz .LBB6_1
+; CHECK-LE-NEXT: # %bb.2: # %entry
+; CHECK-LE-NEXT: li r3, 3
+; CHECK-LE-NEXT: stb r3, 48(r1)
+; CHECK-LE-NEXT: lbz r3, 48(r1)
+; CHECK-LE-NEXT: ld r1, 0(r1)
+; CHECK-LE-NEXT: blr
+;
+; CHECK-BE-LABEL: f6:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: mr r12, r1
+; CHECK-BE-NEXT: stdu r12, -64(r1)
+; CHECK-BE-NEXT: lis r0, 4
+; CHECK-BE-NEXT: nop
+; CHECK-BE-NEXT: mtctr r0
+; CHECK-BE-NEXT: .LBB6_1: # %entry
+; CHECK-BE-NEXT: #
+; CHECK-BE-NEXT: stdu r12, -4096(r1)
+; CHECK-BE-NEXT: bdnz .LBB6_1
+; CHECK-BE-NEXT: # %bb.2: # %entry
+; CHECK-BE-NEXT: li r3, 3
+; CHECK-BE-NEXT: stb r3, 64(r1)
+; CHECK-BE-NEXT: lbz r3, 64(r1)
+; CHECK-BE-NEXT: ld r1, 0(r1)
+; CHECK-BE-NEXT: blr
+;
+; CHECK-32-LABEL: f6:
+; CHECK-32: # %bb.0: # %entry
+; CHECK-32-NEXT: mr r12, r1
+; CHECK-32-NEXT: stwu r12, -16(r1)
+; CHECK-32-NEXT: lis r0, 4
+; CHECK-32-NEXT: nop
+; CHECK-32-NEXT: mtctr r0
+; CHECK-32-NEXT: .LBB6_1: # %entry
+; CHECK-32-NEXT: #
+; CHECK-32-NEXT: stwu r12, -4096(r1)
+; CHECK-32-NEXT: bdnz .LBB6_1
+; CHECK-32-NEXT: # %bb.2: # %entry
+; CHECK-32-NEXT: sub r0, r1, r12
+; CHECK-32-NEXT: li r3, 3
+; CHECK-32-NEXT: sub r0, r1, r0
+; CHECK-32-NEXT: stb r3, 16(r1)
+; CHECK-32-NEXT: mr r0, r31
+; CHECK-32-NEXT: lbz r3, 16(r1)
+; CHECK-32-NEXT: lwz r31, 0(r1)
+; CHECK-32-NEXT: mr r1, r31
+; CHECK-32-NEXT: mr r31, r0
+; CHECK-32-NEXT: blr
+entry:
+ %a = alloca i8, i64 1073741824
+ %b = getelementptr inbounds i8, i8* %a, i64 63
+ store volatile i8 3, i8* %a
+ %c = load volatile i8, i8* %a
+ ret i8 %c
+}
+
+define i8 @f7() #0 "stack-probe-size"="65536" nounwind {
+; CHECK-LE-LABEL: f7:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: lis r0, -1
+; CHECK-LE-NEXT: mr r12, r1
+; CHECK-LE-NEXT: ori r0, r0, 13776
+; CHECK-LE-NEXT: stdux r12, r1, r0
+; CHECK-LE-NEXT: li r0, 15258
+; CHECK-LE-NEXT: mtctr r0
+; CHECK-LE-NEXT: lis r0, -1
+; CHECK-LE-NEXT: nop
+; CHECK-LE-NEXT: .LBB7_1: # %entry
+; CHECK-LE-NEXT: #
+; CHECK-LE-NEXT: stdux r12, r1, r0
+; CHECK-LE-NEXT: bdnz .LBB7_1
+; CHECK-LE-NEXT: # %bb.2: # %entry
+; CHECK-LE-NEXT: li r3, 3
+; CHECK-LE-NEXT: stb r3, 41(r1)
+; CHECK-LE-NEXT: lbz r3, 41(r1)
+; CHECK-LE-NEXT: ld r1, 0(r1)
+; CHECK-LE-NEXT: blr
+;
+; CHECK-BE-LABEL: f7:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lis r0, -1
+; CHECK-BE-NEXT: mr r12, r1
+; CHECK-BE-NEXT: ori r0, r0, 13760
+; CHECK-BE-NEXT: stdux r12, r1, r0
+; CHECK-BE-NEXT: li r0, 15258
+; CHECK-BE-NEXT: mtctr r0
+; CHECK-BE-NEXT: lis r0, -1
+; CHECK-BE-NEXT: nop
+; CHECK-BE-NEXT: .LBB7_1: # %entry
+; CHECK-BE-NEXT: #
+; CHECK-BE-NEXT: stdux r12, r1, r0
+; CHECK-BE-NEXT: bdnz .LBB7_1
+; CHECK-BE-NEXT: # %bb.2: # %entry
+; CHECK-BE-NEXT: li r3, 3
+; CHECK-BE-NEXT: stb r3, 57(r1)
+; CHECK-BE-NEXT: lbz r3, 57(r1)
+; CHECK-BE-NEXT: ld r1, 0(r1)
+; CHECK-BE-NEXT: blr
+;
+; CHECK-32-LABEL: f7:
+; CHECK-32: # %bb.0: # %entry
+; CHECK-32-NEXT: lis r0, -1
+; CHECK-32-NEXT: mr r12, r1
+; CHECK-32-NEXT: ori r0, r0, 13808
+; CHECK-32-NEXT: stwux r12, r1, r0
+; CHECK-32-NEXT: li r0, 15258
+; CHECK-32-NEXT: mtctr r0
+; CHECK-32-NEXT: lis r0, -1
+; CHECK-32-NEXT: nop
+; CHECK-32-NEXT: .LBB7_1: # %entry
+; CHECK-32-NEXT: #
+; CHECK-32-NEXT: stwux r12, r1, r0
+; CHECK-32-NEXT: bdnz .LBB7_1
+; CHECK-32-NEXT: # %bb.2: # %entry
+; CHECK-32-NEXT: sub r0, r1, r12
+; CHECK-32-NEXT: li r3, 3
+; CHECK-32-NEXT: sub r0, r1, r0
+; CHECK-32-NEXT: stb r3, 9(r1)
+; CHECK-32-NEXT: mr r0, r31
+; CHECK-32-NEXT: lbz r3, 9(r1)
+; CHECK-32-NEXT: lwz r31, 0(r1)
+; CHECK-32-NEXT: mr r1, r31
+; CHECK-32-NEXT: mr r31, r0
+; CHECK-32-NEXT: blr
+entry:
+ %a = alloca i8, i64 1000000007
+ %b = getelementptr inbounds i8, i8* %a, i64 101
+ store volatile i8 3, i8* %a
+ %c = load volatile i8, i8* %a
+ ret i8 %c
+}
+
+attributes #0 = { "probe-stack"="inline-asm" }
More information about the llvm-commits
mailing list