[llvm] c352e08 - [PowerPC] Implement probing for prologue

Kai Luo via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 3 20:31:17 PDT 2020


Author: Kai Luo
Date: 2020-07-04T03:07:08Z
New Revision: c352e0885a6b402e5d92cb82f0d0c4e933626b45

URL: https://github.com/llvm/llvm-project/commit/c352e0885a6b402e5d92cb82f0d0c4e933626b45
DIFF: https://github.com/llvm/llvm-project/commit/c352e0885a6b402e5d92cb82f0d0c4e933626b45.diff

LOG: [PowerPC] Implement probing for prologue

This patch is part of supporting `-fstack-clash-protection`. Implemented
probing when emitting prologue.

Differential Revision: https://reviews.llvm.org/D81460

Added: 
    llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll

Modified: 
    llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
    llvm/lib/Target/PowerPC/PPCFrameLowering.h
    llvm/lib/Target/PowerPC/PPCInstr64Bit.td
    llvm/lib/Target/PowerPC/PPCInstrInfo.td

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 2c95b97bafc1..93ea3154e47f 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -10,6 +10,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "MCTargetDesc/PPCPredicates.h"
 #include "PPCFrameLowering.h"
 #include "PPCInstrBuilder.h"
 #include "PPCInstrInfo.h"
@@ -31,6 +32,7 @@ using namespace llvm;
 #define DEBUG_TYPE "framelowering"
 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
+STATISTIC(NumPrologProbed, "Number of prologues probed");
 
 static cl::opt<bool>
 EnablePEVectorSpills("ppc-enable-pe-vector-spills",
@@ -757,6 +759,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
   MachineFrameInfo &MFI = MF.getFrameInfo();
   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+  const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
 
   MachineModuleInfo &MMI = MF.getMMI();
   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
@@ -845,9 +848,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
          "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
 
   // Using the same bool variable as below to suppress compiler warnings.
-  bool SingleScratchReg =
-    findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB),
-                        &ScratchReg, &TempReg);
+  // Stack probe requires two scratch registers, one for old sp, one for large
+  // frame and large probe size.
+  bool SingleScratchReg = findScratchRegister(
+      &MBB, false,
+      twoUniqueScratchRegsRequired(&MBB) || TLI.hasInlineStackProbe(MF),
+      &ScratchReg, &TempReg);
   assert(SingleScratchReg &&
          "Required number of registers not available in this block");
 
@@ -1023,58 +1029,81 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
   // the negated frame size will be placed in ScratchReg.
   bool HasSTUX = false;
 
-  // This condition must be kept in sync with canUseAsPrologue.
-  if (HasBP && MaxAlign > 1) {
-    if (isPPC64)
-      BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
-          .addReg(SPReg)
-          .addImm(0)
-          .addImm(64 - Log2(MaxAlign));
-    else // PPC32...
-      BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
-          .addReg(SPReg)
-          .addImm(0)
-          .addImm(32 - Log2(MaxAlign))
-          .addImm(31);
-    if (!isLargeFrame) {
-      BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
-        .addReg(ScratchReg, RegState::Kill)
+  // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain
+  // pointer is always stored at SP, we will get a free probe due to an essential
+  // STU(X) instruction.
+  if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) {
+    // To be consistent with other targets, a pseudo instruction is emitted and
+    // will be later expanded in `inlineStackProbe`.
+    BuildMI(MBB, MBBI, dl,
+            TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64
+                            : PPC::PROBED_STACKALLOC_32))
+        .addDef(ScratchReg)
+        .addDef(TempReg) // TempReg stores the old sp.
         .addImm(NegFrameSize);
-    } else {
-      assert(!SingleScratchReg && "Only a single scratch reg available");
-      BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
-        .addImm(NegFrameSize >> 16);
-      BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
-        .addReg(TempReg, RegState::Kill)
-        .addImm(NegFrameSize & 0xFFFF);
-      BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
-        .addReg(ScratchReg, RegState::Kill)
-        .addReg(TempReg, RegState::Kill);
+    // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we
+    // update the ScratchReg to meet the assumption that ScratchReg contains
+    // the NegFrameSize. This solution is rather tricky.
+    if (!HasRedZone) {
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
+          .addReg(TempReg)
+          .addReg(SPReg);
+      HasSTUX = true;
     }
+  } else {
+    // This condition must be kept in sync with canUseAsPrologue.
+    if (HasBP && MaxAlign > 1) {
+      if (isPPC64)
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
+            .addReg(SPReg)
+            .addImm(0)
+            .addImm(64 - Log2(MaxAlign));
+      else // PPC32...
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
+            .addReg(SPReg)
+            .addImm(0)
+            .addImm(32 - Log2(MaxAlign))
+            .addImm(31);
+      if (!isLargeFrame) {
+        BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
+            .addReg(ScratchReg, RegState::Kill)
+            .addImm(NegFrameSize);
+      } else {
+        assert(!SingleScratchReg && "Only a single scratch reg available");
+        BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
+            .addImm(NegFrameSize >> 16);
+        BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
+            .addReg(TempReg, RegState::Kill)
+            .addImm(NegFrameSize & 0xFFFF);
+        BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
+            .addReg(ScratchReg, RegState::Kill)
+            .addReg(TempReg, RegState::Kill);
+      }
 
-    BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
-      .addReg(SPReg, RegState::Kill)
-      .addReg(SPReg)
-      .addReg(ScratchReg);
-    HasSTUX = true;
+      BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
+          .addReg(SPReg, RegState::Kill)
+          .addReg(SPReg)
+          .addReg(ScratchReg);
+      HasSTUX = true;
 
-  } else if (!isLargeFrame) {
-    BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
-      .addReg(SPReg)
-      .addImm(NegFrameSize)
-      .addReg(SPReg);
+    } else if (!isLargeFrame) {
+      BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
+          .addReg(SPReg)
+          .addImm(NegFrameSize)
+          .addReg(SPReg);
 
-  } else {
-    BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
-      .addImm(NegFrameSize >> 16);
-    BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
-      .addReg(ScratchReg, RegState::Kill)
-      .addImm(NegFrameSize & 0xFFFF);
-    BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
-      .addReg(SPReg, RegState::Kill)
-      .addReg(SPReg)
-      .addReg(ScratchReg);
-    HasSTUX = true;
+    } else {
+      BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
+          .addImm(NegFrameSize >> 16);
+      BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
+          .addReg(ScratchReg, RegState::Kill)
+          .addImm(NegFrameSize & 0xFFFF);
+      BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
+          .addReg(SPReg, RegState::Kill)
+          .addReg(SPReg)
+          .addReg(ScratchReg);
+      HasSTUX = true;
+    }
   }
 
   // Save the TOC register after the stack pointer update if a prologue TOC
@@ -1335,6 +1364,142 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
   }
 }
 
+void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
+                                        MachineBasicBlock &PrologMBB) const {
+  // TODO: Generate CFI instructions.
+  bool isPPC64 = Subtarget.isPPC64();
+  const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
+  const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
+    int Opc = MI.getOpcode();
+    return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32;
+  });
+  if (StackAllocMIPos == PrologMBB.end())
+    return;
+  const BasicBlock *ProbedBB = PrologMBB.getBasicBlock();
+  DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos);
+  MachineInstr &MI = *StackAllocMIPos;
+  int64_t NegFrameSize = MI.getOperand(2).getImm();
+  int64_t NegProbeSize = -(int64_t)TLI.getStackProbeSize(MF);
+  assert(isInt<32>(NegProbeSize) && "Unhandled probe size");
+  int64_t NumBlocks = NegFrameSize / NegProbeSize;
+  int64_t NegResidualSize = NegFrameSize % NegProbeSize;
+  Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
+  Register ScratchReg = MI.getOperand(0).getReg();
+  Register FPReg = MI.getOperand(1).getReg();
+  const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+  bool HasBP = RegInfo->hasBasePointer(MF);
+  Align MaxAlign = MFI.getMaxAlign();
+  // Initialize current frame pointer.
+  const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR);
+  BuildMI(PrologMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
+  // Subroutine to determine if we can use the Imm as part of d-form.
+  auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; };
+  // Subroutine to materialize the Imm into TempReg.
+  auto MaterializeImm = [&](MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MBBI, int64_t Imm,
+                            Register &TempReg) {
+    assert(isInt<32>(Imm) && "Unhandled imm");
+    if (isInt<16>(Imm))
+      BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg)
+          .addImm(Imm);
+    else {
+      BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
+          .addImm(Imm >> 16);
+      BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg)
+          .addReg(TempReg)
+          .addImm(Imm & 0xFFFF);
+    }
+  };
+  // Subroutine to store frame pointer and decrease stack pointer by probe size.
+  auto allocateAndProbe = [&](MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator MBBI, int64_t NegSize,
+                              Register NegSizeReg, bool UseDForm) {
+    if (UseDForm)
+      BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg)
+          .addReg(FPReg)
+          .addImm(NegSize)
+          .addReg(SPReg);
+    else
+      BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
+          .addReg(FPReg)
+          .addReg(SPReg)
+          .addReg(NegSizeReg);
+  };
+  // For case HasBP && MaxAlign > 1, we have to align the SP by performing
+  // SP = SP - SP % MaxAlign.
+  if (HasBP && MaxAlign > 1) {
+    if (isPPC64)
+      BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg)
+          .addReg(FPReg)
+          .addImm(0)
+          .addImm(64 - Log2(MaxAlign));
+    else
+      BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
+          .addReg(FPReg)
+          .addImm(0)
+          .addImm(32 - Log2(MaxAlign))
+          .addImm(31);
+    BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX),
+            SPReg)
+        .addReg(FPReg)
+        .addReg(SPReg)
+        .addReg(ScratchReg);
+  }
+  // Probe residual part.
+  if (NegResidualSize) {
+    bool ResidualUseDForm = CanUseDForm(NegResidualSize);
+    if (!ResidualUseDForm)
+      MaterializeImm(PrologMBB, {MI}, NegResidualSize, ScratchReg);
+    allocateAndProbe(PrologMBB, {MI}, NegResidualSize, ScratchReg,
+                     ResidualUseDForm);
+  }
+  bool UseDForm = CanUseDForm(NegProbeSize);
+  // If number of blocks is small, just probe them directly.
+  if (NumBlocks < 3) {
+    if (!UseDForm)
+      MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg);
+    for (int i = 0; i < NumBlocks; ++i)
+      allocateAndProbe(PrologMBB, {MI}, NegProbeSize, ScratchReg, UseDForm);
+  } else {
+    // Since CTR is a volatile register and current shrinkwrap implementation
+    // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
+    // CTR loop to probe.
+    // Calculate trip count and stores it in CTRReg.
+    MaterializeImm(PrologMBB, {MI}, NumBlocks, ScratchReg);
+    BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
+        .addReg(ScratchReg, RegState::Kill);
+    if (!UseDForm)
+      MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg);
+    // Create MBBs of the loop.
+    MachineFunction::iterator MBBInsertPoint =
+        std::next(PrologMBB.getIterator());
+    MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
+    MF.insert(MBBInsertPoint, LoopMBB);
+    MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
+    MF.insert(MBBInsertPoint, ExitMBB);
+    // Synthesize the loop body.
+    allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
+                     UseDForm);
+    BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
+        .addMBB(LoopMBB);
+    LoopMBB->addSuccessor(ExitMBB);
+    LoopMBB->addSuccessor(LoopMBB);
+    // Synthesize the exit MBB.
+    ExitMBB->splice(ExitMBB->end(), &PrologMBB,
+                    std::next(MachineBasicBlock::iterator(MI)),
+                    PrologMBB.end());
+    ExitMBB->transferSuccessorsAndUpdatePHIs(&PrologMBB);
+    PrologMBB.addSuccessor(LoopMBB);
+    // Update liveins.
+    recomputeLiveIns(*LoopMBB);
+    recomputeLiveIns(*ExitMBB);
+  }
+  ++NumPrologProbed;
+  MI.eraseFromParent();
+}
+
 void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
                                     MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();

diff  --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/llvm/lib/Target/PowerPC/PPCFrameLowering.h
index dd832ffba643..8bf52c0ed01a 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.h
@@ -100,6 +100,8 @@ class PPCFrameLowering: public TargetFrameLowering {
   /// the function.
   void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+  void inlineStackProbe(MachineFunction &MF,
+                        MachineBasicBlock &PrologMBB) const override;
 
   bool hasFP(const MachineFunction &MF) const override;
   bool needsFP(const MachineFunction &MF) const;

diff  --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 25001ad8ace3..1c457d4170d5 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -434,6 +434,9 @@ def PROBED_ALLOCA_64 : PPCCustomInserterPseudo<(outs g8rc:$result),
 def PREPARE_PROBED_ALLOCA_64 : PPCEmitTimePseudo<(outs g8rc:$fp,
     g8rc:$sp),
     (ins g8rc:$negsize, memri:$fpsi), "#PREPARE_PROBED_ALLOCA_64", []>;
+def PROBED_STACKALLOC_64 : PPCEmitTimePseudo<(outs g8rc:$scratch, g8rc:$temp),
+    (ins i64imm:$stacksize),
+    "#PROBED_STACKALLOC_64", []>;
 }
 
 let hasSideEffects = 0 in {

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 9c986fc52b68..673ab63039cf 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1409,6 +1409,9 @@ def PROBED_ALLOCA_32 : PPCCustomInserterPseudo<(outs gprc:$result),
 def PREPARE_PROBED_ALLOCA_32 : PPCEmitTimePseudo<(outs gprc:$fp,
     gprc:$sp),
     (ins gprc:$negsize, memri:$fpsi), "#PREPARE_PROBED_ALLOCA_32", []>;
+def PROBED_STACKALLOC_32 : PPCEmitTimePseudo<(outs gprc:$scratch, gprc:$temp),
+    (ins i64imm:$stacksize),
+    "#PROBED_STACKALLOC_32", []>;
 }
 
 // SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded after

diff  --git a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
new file mode 100644
index 000000000000..e595d8a732a5
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
@@ -0,0 +1,474 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs \
+; RUN:   -mtriple=powerpc64le-linux-gnu < %s | FileCheck \
+; RUN:   -check-prefix=CHECK-LE %s
+; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs \
+; RUN:   -mtriple=powerpc64-linux-gnu < %s | FileCheck \
+; RUN:   -check-prefix=CHECK-BE %s
+; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs \
+; RUN:   -mtriple=powerpc-linux-gnu < %s | FileCheck \
+; RUN:   -check-prefix=CHECK-32 %s
+
+; Free probe
+define i8 @f0() #0 nounwind {
+; CHECK-LE-LABEL: f0:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    li r3, 3
+; CHECK-LE-NEXT:    stb r3, -64(r1)
+; CHECK-LE-NEXT:    lbz r3, -64(r1)
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: f0:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    li r3, 3
+; CHECK-BE-NEXT:    stb r3, -64(r1)
+; CHECK-BE-NEXT:    lbz r3, -64(r1)
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-32-LABEL: f0:
+; CHECK-32:       # %bb.0: # %entry
+; CHECK-32-NEXT:    stwu r1, -80(r1)
+; CHECK-32-NEXT:    li r3, 3
+; CHECK-32-NEXT:    stb r3, 16(r1)
+; CHECK-32-NEXT:    lbz r3, 16(r1)
+; CHECK-32-NEXT:    addi r1, r1, 80
+; CHECK-32-NEXT:    blr
+entry:
+  %a = alloca i8, i64 64
+  %b = getelementptr inbounds i8, i8* %a, i64 63
+  store volatile i8 3, i8* %a
+  %c = load volatile i8, i8* %a
+  ret i8 %c
+}
+
+define i8 @f1() #0 "stack-probe-size"="0" nounwind {
+; CHECK-LE-LABEL: f1:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mr r12, r1
+; CHECK-LE-NEXT:    li r0, 259
+; CHECK-LE-NEXT:    mtctr r0
+; CHECK-LE-NEXT:  .LBB1_1: # %entry
+; CHECK-LE-NEXT:    #
+; CHECK-LE-NEXT:    stdu r12, -16(r1)
+; CHECK-LE-NEXT:    bdnz .LBB1_1
+; CHECK-LE-NEXT:  # %bb.2: # %entry
+; CHECK-LE-NEXT:    li r3, 3
+; CHECK-LE-NEXT:    stb r3, 48(r1)
+; CHECK-LE-NEXT:    lbz r3, 48(r1)
+; CHECK-LE-NEXT:    addi r1, r1, 4144
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: f1:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mr r12, r1
+; CHECK-BE-NEXT:    li r0, 260
+; CHECK-BE-NEXT:    mtctr r0
+; CHECK-BE-NEXT:  .LBB1_1: # %entry
+; CHECK-BE-NEXT:    #
+; CHECK-BE-NEXT:    stdu r12, -16(r1)
+; CHECK-BE-NEXT:    bdnz .LBB1_1
+; CHECK-BE-NEXT:  # %bb.2: # %entry
+; CHECK-BE-NEXT:    li r3, 3
+; CHECK-BE-NEXT:    stb r3, 64(r1)
+; CHECK-BE-NEXT:    lbz r3, 64(r1)
+; CHECK-BE-NEXT:    addi r1, r1, 4160
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-32-LABEL: f1:
+; CHECK-32:       # %bb.0: # %entry
+; CHECK-32-NEXT:    mr r12, r1
+; CHECK-32-NEXT:    li r0, 257
+; CHECK-32-NEXT:    mtctr r0
+; CHECK-32-NEXT:  .LBB1_1: # %entry
+; CHECK-32-NEXT:    #
+; CHECK-32-NEXT:    stwu r12, -16(r1)
+; CHECK-32-NEXT:    bdnz .LBB1_1
+; CHECK-32-NEXT:  # %bb.2: # %entry
+; CHECK-32-NEXT:    li r3, 3
+; CHECK-32-NEXT:    sub r0, r1, r12
+; CHECK-32-NEXT:    stb r3, 16(r1)
+; CHECK-32-NEXT:    sub r0, r1, r0
+; CHECK-32-NEXT:    lbz r3, 16(r1)
+; CHECK-32-NEXT:    addi r1, r1, 4112
+; CHECK-32-NEXT:    blr
+entry:
+  %a = alloca i8, i64 4096
+  %b = getelementptr inbounds i8, i8* %a, i64 63
+  store volatile i8 3, i8* %a
+  %c = load volatile i8, i8* %a
+  ret i8 %c
+}
+
+define i8 @f2() #0 nounwind {
+; CHECK-LE-LABEL: f2:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mr r12, r1
+; CHECK-LE-NEXT:    stdu r12, -48(r1)
+; CHECK-LE-NEXT:    li r0, 16
+; CHECK-LE-NEXT:    mtctr r0
+; CHECK-LE-NEXT:  .LBB2_1: # %entry
+; CHECK-LE-NEXT:    #
+; CHECK-LE-NEXT:    stdu r12, -4096(r1)
+; CHECK-LE-NEXT:    bdnz .LBB2_1
+; CHECK-LE-NEXT:  # %bb.2: # %entry
+; CHECK-LE-NEXT:    li r3, 3
+; CHECK-LE-NEXT:    stb r3, 48(r1)
+; CHECK-LE-NEXT:    lbz r3, 48(r1)
+; CHECK-LE-NEXT:    ld r1, 0(r1)
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: f2:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mr r12, r1
+; CHECK-BE-NEXT:    stdu r12, -64(r1)
+; CHECK-BE-NEXT:    li r0, 16
+; CHECK-BE-NEXT:    mtctr r0
+; CHECK-BE-NEXT:  .LBB2_1: # %entry
+; CHECK-BE-NEXT:    #
+; CHECK-BE-NEXT:    stdu r12, -4096(r1)
+; CHECK-BE-NEXT:    bdnz .LBB2_1
+; CHECK-BE-NEXT:  # %bb.2: # %entry
+; CHECK-BE-NEXT:    li r3, 3
+; CHECK-BE-NEXT:    stb r3, 64(r1)
+; CHECK-BE-NEXT:    lbz r3, 64(r1)
+; CHECK-BE-NEXT:    ld r1, 0(r1)
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-32-LABEL: f2:
+; CHECK-32:       # %bb.0: # %entry
+; CHECK-32-NEXT:    mr r12, r1
+; CHECK-32-NEXT:    stwu r12, -16(r1)
+; CHECK-32-NEXT:    li r0, 16
+; CHECK-32-NEXT:    mtctr r0
+; CHECK-32-NEXT:  .LBB2_1: # %entry
+; CHECK-32-NEXT:    #
+; CHECK-32-NEXT:    stwu r12, -4096(r1)
+; CHECK-32-NEXT:    bdnz .LBB2_1
+; CHECK-32-NEXT:  # %bb.2: # %entry
+; CHECK-32-NEXT:    sub r0, r1, r12
+; CHECK-32-NEXT:    li r3, 3
+; CHECK-32-NEXT:    sub r0, r1, r0
+; CHECK-32-NEXT:    stb r3, 16(r1)
+; CHECK-32-NEXT:    mr r0, r31
+; CHECK-32-NEXT:    lbz r3, 16(r1)
+; CHECK-32-NEXT:    lwz r31, 0(r1)
+; CHECK-32-NEXT:    mr r1, r31
+; CHECK-32-NEXT:    mr r31, r0
+; CHECK-32-NEXT:    blr
+entry:
+  %a = alloca i8, i64 65536
+  %b = getelementptr inbounds i8, i8* %a, i64 63
+  store volatile i8 3, i8* %a
+  %c = load volatile i8, i8* %a
+  ret i8 %c
+}
+
+define i8 @f3() #0 "stack-probe-size"="32768" nounwind {
+; CHECK-LE-LABEL: f3:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mr r12, r1
+; CHECK-LE-NEXT:    stdu r12, -48(r1)
+; CHECK-LE-NEXT:    stdu r12, -32768(r1)
+; CHECK-LE-NEXT:    stdu r12, -32768(r1)
+; CHECK-LE-NEXT:    li r3, 3
+; CHECK-LE-NEXT:    stb r3, 48(r1)
+; CHECK-LE-NEXT:    lbz r3, 48(r1)
+; CHECK-LE-NEXT:    ld r1, 0(r1)
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: f3:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mr r12, r1
+; CHECK-BE-NEXT:    stdu r12, -64(r1)
+; CHECK-BE-NEXT:    stdu r12, -32768(r1)
+; CHECK-BE-NEXT:    stdu r12, -32768(r1)
+; CHECK-BE-NEXT:    li r3, 3
+; CHECK-BE-NEXT:    stb r3, 64(r1)
+; CHECK-BE-NEXT:    lbz r3, 64(r1)
+; CHECK-BE-NEXT:    ld r1, 0(r1)
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-32-LABEL: f3:
+; CHECK-32:       # %bb.0: # %entry
+; CHECK-32-NEXT:    mr r12, r1
+; CHECK-32-NEXT:    stwu r12, -16(r1)
+; CHECK-32-NEXT:    stwu r12, -32768(r1)
+; CHECK-32-NEXT:    stwu r12, -32768(r1)
+; CHECK-32-NEXT:    sub r0, r1, r12
+; CHECK-32-NEXT:    li r3, 3
+; CHECK-32-NEXT:    sub r0, r1, r0
+; CHECK-32-NEXT:    stb r3, 16(r1)
+; CHECK-32-NEXT:    mr r0, r31
+; CHECK-32-NEXT:    lbz r3, 16(r1)
+; CHECK-32-NEXT:    lwz r31, 0(r1)
+; CHECK-32-NEXT:    mr r1, r31
+; CHECK-32-NEXT:    mr r31, r0
+; CHECK-32-NEXT:    blr
+entry:
+  %a = alloca i8, i64 65536
+  %b = getelementptr inbounds i8, i8* %a, i64 63
+  store volatile i8 3, i8* %a
+  %c = load volatile i8, i8* %a
+  ret i8 %c
+}
+
+; Same as f2, but without protection.
+define i8 @f4() nounwind {
+; CHECK-LE-LABEL: f4:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    lis r0, -2
+; CHECK-LE-NEXT:    ori r0, r0, 65488
+; CHECK-LE-NEXT:    stdux r1, r1, r0
+; CHECK-LE-NEXT:    li r3, 3
+; CHECK-LE-NEXT:    stb r3, 48(r1)
+; CHECK-LE-NEXT:    lbz r3, 48(r1)
+; CHECK-LE-NEXT:    ld r1, 0(r1)
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: f4:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lis r0, -2
+; CHECK-BE-NEXT:    ori r0, r0, 65472
+; CHECK-BE-NEXT:    stdux r1, r1, r0
+; CHECK-BE-NEXT:    li r3, 3
+; CHECK-BE-NEXT:    stb r3, 64(r1)
+; CHECK-BE-NEXT:    lbz r3, 64(r1)
+; CHECK-BE-NEXT:    ld r1, 0(r1)
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-32-LABEL: f4:
+; CHECK-32:       # %bb.0: # %entry
+; CHECK-32-NEXT:    lis r0, -2
+; CHECK-32-NEXT:    ori r0, r0, 65520
+; CHECK-32-NEXT:    stwux r1, r1, r0
+; CHECK-32-NEXT:    li r3, 3
+; CHECK-32-NEXT:    sub r0, r1, r0
+; CHECK-32-NEXT:    stb r3, 16(r1)
+; CHECK-32-NEXT:    mr r0, r31
+; CHECK-32-NEXT:    lbz r3, 16(r1)
+; CHECK-32-NEXT:    lwz r31, 0(r1)
+; CHECK-32-NEXT:    mr r1, r31
+; CHECK-32-NEXT:    mr r31, r0
+; CHECK-32-NEXT:    blr
+entry:
+  %a = alloca i8, i64 65536
+  %b = getelementptr inbounds i8, i8* %a, i64 63
+  store volatile i8 3, i8* %a
+  %c = load volatile i8, i8* %a
+  ret i8 %c
+}
+
+define i8 @f5() #0 "stack-probe-size"="65536" nounwind {
+; CHECK-LE-LABEL: f5:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mr r12, r1
+; CHECK-LE-NEXT:    stdu r12, -48(r1)
+; CHECK-LE-NEXT:    li r0, 16
+; CHECK-LE-NEXT:    mtctr r0
+; CHECK-LE-NEXT:    lis r0, -1
+; CHECK-LE-NEXT:    nop
+; CHECK-LE-NEXT:  .LBB5_1: # %entry
+; CHECK-LE-NEXT:    #
+; CHECK-LE-NEXT:    stdux r12, r1, r0
+; CHECK-LE-NEXT:    bdnz .LBB5_1
+; CHECK-LE-NEXT:  # %bb.2: # %entry
+; CHECK-LE-NEXT:    li r3, 3
+; CHECK-LE-NEXT:    stb r3, 48(r1)
+; CHECK-LE-NEXT:    lbz r3, 48(r1)
+; CHECK-LE-NEXT:    ld r1, 0(r1)
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: f5:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mr r12, r1
+; CHECK-BE-NEXT:    stdu r12, -64(r1)
+; CHECK-BE-NEXT:    li r0, 16
+; CHECK-BE-NEXT:    mtctr r0
+; CHECK-BE-NEXT:    lis r0, -1
+; CHECK-BE-NEXT:    nop
+; CHECK-BE-NEXT:  .LBB5_1: # %entry
+; CHECK-BE-NEXT:    #
+; CHECK-BE-NEXT:    stdux r12, r1, r0
+; CHECK-BE-NEXT:    bdnz .LBB5_1
+; CHECK-BE-NEXT:  # %bb.2: # %entry
+; CHECK-BE-NEXT:    li r3, 3
+; CHECK-BE-NEXT:    stb r3, 64(r1)
+; CHECK-BE-NEXT:    lbz r3, 64(r1)
+; CHECK-BE-NEXT:    ld r1, 0(r1)
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-32-LABEL: f5:
+; CHECK-32:       # %bb.0: # %entry
+; CHECK-32-NEXT:    mr r12, r1
+; CHECK-32-NEXT:    stwu r12, -16(r1)
+; CHECK-32-NEXT:    li r0, 16
+; CHECK-32-NEXT:    mtctr r0
+; CHECK-32-NEXT:    lis r0, -1
+; CHECK-32-NEXT:    nop
+; CHECK-32-NEXT:  .LBB5_1: # %entry
+; CHECK-32-NEXT:    #
+; CHECK-32-NEXT:    stwux r12, r1, r0
+; CHECK-32-NEXT:    bdnz .LBB5_1
+; CHECK-32-NEXT:  # %bb.2: # %entry
+; CHECK-32-NEXT:    sub r0, r1, r12
+; CHECK-32-NEXT:    li r3, 3
+; CHECK-32-NEXT:    sub r0, r1, r0
+; CHECK-32-NEXT:    stb r3, 16(r1)
+; CHECK-32-NEXT:    mr r0, r31
+; CHECK-32-NEXT:    lbz r3, 16(r1)
+; CHECK-32-NEXT:    lwz r31, 0(r1)
+; CHECK-32-NEXT:    mr r1, r31
+; CHECK-32-NEXT:    mr r31, r0
+; CHECK-32-NEXT:    blr
+entry:
+  %a = alloca i8, i64 1048576
+  %b = getelementptr inbounds i8, i8* %a, i64 63
+  store volatile i8 3, i8* %a
+  %c = load volatile i8, i8* %a
+  ret i8 %c
+}
+
+define i8 @f6() #0 nounwind {
+; CHECK-LE-LABEL: f6:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mr r12, r1
+; CHECK-LE-NEXT:    stdu r12, -48(r1)
+; CHECK-LE-NEXT:    lis r0, 4
+; CHECK-LE-NEXT:    nop
+; CHECK-LE-NEXT:    mtctr r0
+; CHECK-LE-NEXT:  .LBB6_1: # %entry
+; CHECK-LE-NEXT:    #
+; CHECK-LE-NEXT:    stdu r12, -4096(r1)
+; CHECK-LE-NEXT:    bdnz .LBB6_1
+; CHECK-LE-NEXT:  # %bb.2: # %entry
+; CHECK-LE-NEXT:    li r3, 3
+; CHECK-LE-NEXT:    stb r3, 48(r1)
+; CHECK-LE-NEXT:    lbz r3, 48(r1)
+; CHECK-LE-NEXT:    ld r1, 0(r1)
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: f6:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mr r12, r1
+; CHECK-BE-NEXT:    stdu r12, -64(r1)
+; CHECK-BE-NEXT:    lis r0, 4
+; CHECK-BE-NEXT:    nop
+; CHECK-BE-NEXT:    mtctr r0
+; CHECK-BE-NEXT:  .LBB6_1: # %entry
+; CHECK-BE-NEXT:    #
+; CHECK-BE-NEXT:    stdu r12, -4096(r1)
+; CHECK-BE-NEXT:    bdnz .LBB6_1
+; CHECK-BE-NEXT:  # %bb.2: # %entry
+; CHECK-BE-NEXT:    li r3, 3
+; CHECK-BE-NEXT:    stb r3, 64(r1)
+; CHECK-BE-NEXT:    lbz r3, 64(r1)
+; CHECK-BE-NEXT:    ld r1, 0(r1)
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-32-LABEL: f6:
+; CHECK-32:       # %bb.0: # %entry
+; CHECK-32-NEXT:    mr r12, r1
+; CHECK-32-NEXT:    stwu r12, -16(r1)
+; CHECK-32-NEXT:    lis r0, 4
+; CHECK-32-NEXT:    nop
+; CHECK-32-NEXT:    mtctr r0
+; CHECK-32-NEXT:  .LBB6_1: # %entry
+; CHECK-32-NEXT:    #
+; CHECK-32-NEXT:    stwu r12, -4096(r1)
+; CHECK-32-NEXT:    bdnz .LBB6_1
+; CHECK-32-NEXT:  # %bb.2: # %entry
+; CHECK-32-NEXT:    sub r0, r1, r12
+; CHECK-32-NEXT:    li r3, 3
+; CHECK-32-NEXT:    sub r0, r1, r0
+; CHECK-32-NEXT:    stb r3, 16(r1)
+; CHECK-32-NEXT:    mr r0, r31
+; CHECK-32-NEXT:    lbz r3, 16(r1)
+; CHECK-32-NEXT:    lwz r31, 0(r1)
+; CHECK-32-NEXT:    mr r1, r31
+; CHECK-32-NEXT:    mr r31, r0
+; CHECK-32-NEXT:    blr
+entry:
+  %a = alloca i8, i64 1073741824
+  %b = getelementptr inbounds i8, i8* %a, i64 63
+  store volatile i8 3, i8* %a
+  %c = load volatile i8, i8* %a
+  ret i8 %c
+}
+
+define i8 @f7() #0 "stack-probe-size"="65536" nounwind {
+; CHECK-LE-LABEL: f7:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    lis r0, -1
+; CHECK-LE-NEXT:    mr r12, r1
+; CHECK-LE-NEXT:    ori r0, r0, 13776
+; CHECK-LE-NEXT:    stdux r12, r1, r0
+; CHECK-LE-NEXT:    li r0, 15258
+; CHECK-LE-NEXT:    mtctr r0
+; CHECK-LE-NEXT:    lis r0, -1
+; CHECK-LE-NEXT:    nop
+; CHECK-LE-NEXT:  .LBB7_1: # %entry
+; CHECK-LE-NEXT:    #
+; CHECK-LE-NEXT:    stdux r12, r1, r0
+; CHECK-LE-NEXT:    bdnz .LBB7_1
+; CHECK-LE-NEXT:  # %bb.2: # %entry
+; CHECK-LE-NEXT:    li r3, 3
+; CHECK-LE-NEXT:    stb r3, 41(r1)
+; CHECK-LE-NEXT:    lbz r3, 41(r1)
+; CHECK-LE-NEXT:    ld r1, 0(r1)
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: f7:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lis r0, -1
+; CHECK-BE-NEXT:    mr r12, r1
+; CHECK-BE-NEXT:    ori r0, r0, 13760
+; CHECK-BE-NEXT:    stdux r12, r1, r0
+; CHECK-BE-NEXT:    li r0, 15258
+; CHECK-BE-NEXT:    mtctr r0
+; CHECK-BE-NEXT:    lis r0, -1
+; CHECK-BE-NEXT:    nop
+; CHECK-BE-NEXT:  .LBB7_1: # %entry
+; CHECK-BE-NEXT:    #
+; CHECK-BE-NEXT:    stdux r12, r1, r0
+; CHECK-BE-NEXT:    bdnz .LBB7_1
+; CHECK-BE-NEXT:  # %bb.2: # %entry
+; CHECK-BE-NEXT:    li r3, 3
+; CHECK-BE-NEXT:    stb r3, 57(r1)
+; CHECK-BE-NEXT:    lbz r3, 57(r1)
+; CHECK-BE-NEXT:    ld r1, 0(r1)
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-32-LABEL: f7:
+; CHECK-32:       # %bb.0: # %entry
+; CHECK-32-NEXT:    lis r0, -1
+; CHECK-32-NEXT:    mr r12, r1
+; CHECK-32-NEXT:    ori r0, r0, 13808
+; CHECK-32-NEXT:    stwux r12, r1, r0
+; CHECK-32-NEXT:    li r0, 15258
+; CHECK-32-NEXT:    mtctr r0
+; CHECK-32-NEXT:    lis r0, -1
+; CHECK-32-NEXT:    nop
+; CHECK-32-NEXT:  .LBB7_1: # %entry
+; CHECK-32-NEXT:    #
+; CHECK-32-NEXT:    stwux r12, r1, r0
+; CHECK-32-NEXT:    bdnz .LBB7_1
+; CHECK-32-NEXT:  # %bb.2: # %entry
+; CHECK-32-NEXT:    sub r0, r1, r12
+; CHECK-32-NEXT:    li r3, 3
+; CHECK-32-NEXT:    sub r0, r1, r0
+; CHECK-32-NEXT:    stb r3, 9(r1)
+; CHECK-32-NEXT:    mr r0, r31
+; CHECK-32-NEXT:    lbz r3, 9(r1)
+; CHECK-32-NEXT:    lwz r31, 0(r1)
+; CHECK-32-NEXT:    mr r1, r31
+; CHECK-32-NEXT:    mr r31, r0
+; CHECK-32-NEXT:    blr
+entry:
+  %a = alloca i8, i64 1000000007
+  %b = getelementptr inbounds i8, i8* %a, i64 101
+  store volatile i8 3, i8* %a
+  %c = load volatile i8, i8* %a
+  ret i8 %c
+}
+
+attributes #0 = { "probe-stack"="inline-asm" }


        


More information about the llvm-commits mailing list