[llvm] r333990 - [X86][CET] Shadow stack fix for setjmp/longjmp

Alexander Ivchenko via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 5 02:22:30 PDT 2018


Author: aivchenk
Date: Tue Jun  5 02:22:30 2018
New Revision: 333990

URL: http://llvm.org/viewvc/llvm-project?rev=333990&view=rev
Log:
[X86][CET] Shadow stack fix for setjmp/longjmp

This is the new version of D46181, allowing setjmp/longjmp
to work correctly with the Intel CET shadow stack by storing
SSP on setjmp and fixing it on longjmp. The patch has been
updated to use the cf-protection-return module flag instead
of HasSHSTK, and the bug that caused D46181 to be reverted
has been fixed with the test expanded to track that fix.

patch by mike.dvoretsky

Differential Revision: https://reviews.llvm.org/D47311


Added:
    llvm/trunk/test/CodeGen/X86/shadow-stack.ll
Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.h

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=333990&r1=333989&r2=333990&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Jun  5 02:22:30 2018
@@ -27619,6 +27619,60 @@ X86TargetLowering::EmitLoweredRetpoline(
   return BB;
 }
 
+/// SetJmp implies future control flow change upon calling the corresponding
+/// LongJmp.
+/// Instead of using the 'return' instruction, the long jump fixes the stack and
+/// performs an indirect branch. To do so it uses the registers that were stored
+/// in the jump buffer (when calling SetJmp).
+/// In case the shadow stack is enabled we need to fix it as well, because some
+/// return addresses will be skipped.
+/// The function will save the SSP for future fixing in the function
+/// emitLongJmpShadowStackFix.
+/// \sa emitLongJmpShadowStackFix
+/// \param [in] MI The temporary Machine Instruction for the builtin.
+/// \param [in] MBB The Machine Basic Block that will be modified.
+void X86TargetLowering::emitSetJmpShadowStackFix(MachineInstr &MI,
+                                                 MachineBasicBlock *MBB) const {
+  DebugLoc DL = MI.getDebugLoc();
+  MachineFunction *MF = MBB->getParent();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  MachineInstrBuilder MIB;
+
+  // Memory Reference
+  MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
+  MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
+
+  // Initialize a register with zero.
+  MVT PVT = getPointerTy(MF->getDataLayout());
+  const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
+  unsigned ZReg = MRI.createVirtualRegister(PtrRC);
+  unsigned XorRROpc = (PVT == MVT::i64) ? X86::XOR64rr : X86::XOR32rr;
+  BuildMI(*MBB, MI, DL, TII->get(XorRROpc))
+      .addDef(ZReg)
+      .addReg(ZReg, RegState::Undef)
+      .addReg(ZReg, RegState::Undef);
+
+  // Read the current SSP Register value to the zeroed register.
+  unsigned SSPCopyReg = MRI.createVirtualRegister(PtrRC);
+  unsigned RdsspOpc = (PVT == MVT::i64) ? X86::RDSSPQ : X86::RDSSPD;
+  BuildMI(*MBB, MI, DL, TII->get(RdsspOpc), SSPCopyReg).addReg(ZReg);
+
+  // Write the SSP register value to offset 3 in input memory buffer.
+  unsigned PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr;
+  MIB = BuildMI(*MBB, MI, DL, TII->get(PtrStoreOpc));
+  const int64_t SSPOffset = 3 * PVT.getStoreSize();
+  const unsigned MemOpndSlot = 1;
+  for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+    if (i == X86::AddrDisp)
+      MIB.addDisp(MI.getOperand(MemOpndSlot + i), SSPOffset);
+    else
+      MIB.add(MI.getOperand(MemOpndSlot + i));
+  }
+  MIB.addReg(SSPCopyReg);
+  MIB.setMemRefs(MMOBegin, MMOEnd);
+}
+
 MachineBasicBlock *
 X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
                                     MachineBasicBlock *MBB) const {
@@ -27728,6 +27782,11 @@ X86TargetLowering::emitEHSjLjSetJmp(Mach
   else
     MIB.addMBB(restoreMBB);
   MIB.setMemRefs(MMOBegin, MMOEnd);
+
+  if (MF->getMMI().getModule()->getModuleFlag("cf-protection-return")) {
+    emitSetJmpShadowStackFix(MI, thisMBB);
+  }
+
   // Setup
   MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::EH_SjLj_Setup))
           .addMBB(restoreMBB);
@@ -27769,6 +27828,183 @@ X86TargetLowering::emitEHSjLjSetJmp(Mach
   return sinkMBB;
 }
 
+/// Fix the shadow stack using the previously saved SSP pointer.
+/// \sa emitSetJmpShadowStackFix
+/// \param [in] MI The temporary Machine Instruction for the builtin.
+/// \param [in] MBB The Machine Basic Block that will be modified.
+/// \return The sink MBB that will perform the future indirect branch.
+MachineBasicBlock *
+X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
+                                             MachineBasicBlock *MBB) const {
+  DebugLoc DL = MI.getDebugLoc();
+  MachineFunction *MF = MBB->getParent();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+
+  // Memory Reference
+  MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
+  MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
+
+  MVT PVT = getPointerTy(MF->getDataLayout());
+  const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
+
+  // checkSspMBB:
+  //         xor vreg1, vreg1
+  //         rdssp vreg1
+  //         test vreg1, vreg1
+  //         je sinkMBB   # Jump if Shadow Stack is not supported
+  // fallMBB:
+  //         mov buf+24/12(%rip), vreg2
+  //         sub vreg1, vreg2
+  //         jbe sinkMBB  # No need to fix the Shadow Stack
+  // fixShadowMBB:
+  //         shr 3/2, vreg2
+  //         incssp vreg2  # fix the SSP according to the lower 8 bits
+  //         shr 8, vreg2
+  //         je sinkMBB
+  // fixShadowLoopPrepareMBB:
+  //         shl vreg2
+  //         mov 128, vreg3
+  // fixShadowLoopMBB:
+  //         incssp vreg3
+  //         dec vreg2
+  //         jne fixShadowLoopMBB # Iterate until you finish fixing
+  //                              # the Shadow Stack
+  // sinkMBB:
+
+  MachineFunction::iterator I = ++MBB->getIterator();
+  const BasicBlock *BB = MBB->getBasicBlock();
+
+  MachineBasicBlock *checkSspMBB = MF->CreateMachineBasicBlock(BB);
+  MachineBasicBlock *fallMBB = MF->CreateMachineBasicBlock(BB);
+  MachineBasicBlock *fixShadowMBB = MF->CreateMachineBasicBlock(BB);
+  MachineBasicBlock *fixShadowLoopPrepareMBB = MF->CreateMachineBasicBlock(BB);
+  MachineBasicBlock *fixShadowLoopMBB = MF->CreateMachineBasicBlock(BB);
+  MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
+  MF->insert(I, checkSspMBB);
+  MF->insert(I, fallMBB);
+  MF->insert(I, fixShadowMBB);
+  MF->insert(I, fixShadowLoopPrepareMBB);
+  MF->insert(I, fixShadowLoopMBB);
+  MF->insert(I, sinkMBB);
+
+  // Transfer the remainder of BB and its successor edges to sinkMBB.
+  sinkMBB->splice(sinkMBB->begin(), MBB, MachineBasicBlock::iterator(MI),
+                  MBB->end());
+  sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+  MBB->addSuccessor(checkSspMBB);
+
+  // Initialize a register with zero.
+  unsigned ZReg = MRI.createVirtualRegister(PtrRC);
+  unsigned XorRROpc = (PVT == MVT::i64) ? X86::XOR64rr : X86::XOR32rr;
+  BuildMI(checkSspMBB, DL, TII->get(XorRROpc))
+      .addDef(ZReg)
+      .addReg(ZReg, RegState::Undef)
+      .addReg(ZReg, RegState::Undef);
+
+  // Read the current SSP Register value to the zeroed register.
+  unsigned SSPCopyReg = MRI.createVirtualRegister(PtrRC);
+  unsigned RdsspOpc = (PVT == MVT::i64) ? X86::RDSSPQ : X86::RDSSPD;
+  BuildMI(checkSspMBB, DL, TII->get(RdsspOpc), SSPCopyReg).addReg(ZReg);
+
+  // Check whether the result of the SSP register is zero and jump directly
+  // to the sink.
+  unsigned TestRROpc = (PVT == MVT::i64) ? X86::TEST64rr : X86::TEST32rr;
+  BuildMI(checkSspMBB, DL, TII->get(TestRROpc))
+      .addReg(SSPCopyReg)
+      .addReg(SSPCopyReg);
+  BuildMI(checkSspMBB, DL, TII->get(X86::JE_1)).addMBB(sinkMBB);
+  checkSspMBB->addSuccessor(sinkMBB);
+  checkSspMBB->addSuccessor(fallMBB);
+
+  // Reload the previously saved SSP register value.
+  unsigned PrevSSPReg = MRI.createVirtualRegister(PtrRC);
+  unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm;
+  const int64_t SPPOffset = 3 * PVT.getStoreSize();
+  MachineInstrBuilder MIB =
+      BuildMI(fallMBB, DL, TII->get(PtrLoadOpc), PrevSSPReg);
+  for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+    if (i == X86::AddrDisp)
+      MIB.addDisp(MI.getOperand(i), SPPOffset);
+    else
+      MIB.add(MI.getOperand(i));
+  }
+  MIB.setMemRefs(MMOBegin, MMOEnd);
+
+  // Subtract the current SSP from the previous SSP.
+  unsigned SspSubReg = MRI.createVirtualRegister(PtrRC);
+  unsigned SubRROpc = (PVT == MVT::i64) ? X86::SUB64rr : X86::SUB32rr;
+  BuildMI(fallMBB, DL, TII->get(SubRROpc), SspSubReg)
+      .addReg(PrevSSPReg)
+      .addReg(SSPCopyReg);
+
+  // Jump to sink in case PrevSSPReg <= SSPCopyReg.
+  BuildMI(fallMBB, DL, TII->get(X86::JBE_1)).addMBB(sinkMBB);
+  fallMBB->addSuccessor(sinkMBB);
+  fallMBB->addSuccessor(fixShadowMBB);
+
+  // Shift right by 2/3 for 32/64 because incssp multiplies the argument by 4/8.
+  unsigned ShrRIOpc = (PVT == MVT::i64) ? X86::SHR64ri : X86::SHR32ri;
+  unsigned Offset = (PVT == MVT::i64) ? 3 : 2;
+  unsigned SspFirstShrReg = MRI.createVirtualRegister(PtrRC);
+  BuildMI(fixShadowMBB, DL, TII->get(ShrRIOpc), SspFirstShrReg)
+      .addReg(SspSubReg)
+      .addImm(Offset);
+
+  // Increase SSP when looking only on the lower 8 bits of the delta.
+  unsigned IncsspOpc = (PVT == MVT::i64) ? X86::INCSSPQ : X86::INCSSPD;
+  BuildMI(fixShadowMBB, DL, TII->get(IncsspOpc)).addReg(SspFirstShrReg);
+
+  // Reset the lower 8 bits.
+  unsigned SspSecondShrReg = MRI.createVirtualRegister(PtrRC);
+  BuildMI(fixShadowMBB, DL, TII->get(ShrRIOpc), SspSecondShrReg)
+      .addReg(SspFirstShrReg)
+      .addImm(8);
+
+  // Jump if the result of the shift is zero.
+  BuildMI(fixShadowMBB, DL, TII->get(X86::JE_1)).addMBB(sinkMBB);
+  fixShadowMBB->addSuccessor(sinkMBB);
+  fixShadowMBB->addSuccessor(fixShadowLoopPrepareMBB);
+
+  // Do a single shift left.
+  unsigned ShlR1Opc = (PVT == MVT::i64) ? X86::SHL64r1 : X86::SHL32r1;
+  unsigned SspAfterShlReg = MRI.createVirtualRegister(PtrRC);
+  BuildMI(fixShadowLoopPrepareMBB, DL, TII->get(ShlR1Opc), SspAfterShlReg)
+      .addReg(SspSecondShrReg);
+
+  // Save the value 128 to a register (will be used next with incssp).
+  unsigned Value128InReg = MRI.createVirtualRegister(PtrRC);
+  unsigned MovRIOpc = (PVT == MVT::i64) ? X86::MOV64ri32 : X86::MOV32ri;
+  BuildMI(fixShadowLoopPrepareMBB, DL, TII->get(MovRIOpc), Value128InReg)
+      .addImm(128);
+  fixShadowLoopPrepareMBB->addSuccessor(fixShadowLoopMBB);
+
+  // Since incssp only looks at the lower 8 bits, we might need to do several
+  // iterations of incssp until we finish fixing the shadow stack.
+  unsigned DecReg = MRI.createVirtualRegister(PtrRC);
+  unsigned CounterReg = MRI.createVirtualRegister(PtrRC);
+  BuildMI(fixShadowLoopMBB, DL, TII->get(X86::PHI), CounterReg)
+      .addReg(SspAfterShlReg)
+      .addMBB(fixShadowLoopPrepareMBB)
+      .addReg(DecReg)
+      .addMBB(fixShadowLoopMBB);
+
+  // Every iteration we increase the SSP by 128.
+  BuildMI(fixShadowLoopMBB, DL, TII->get(IncsspOpc)).addReg(Value128InReg);
+
+  // Every iteration we decrement the counter by 1.
+  unsigned DecROpc = (PVT == MVT::i64) ? X86::DEC64r : X86::DEC32r;
+  BuildMI(fixShadowLoopMBB, DL, TII->get(DecROpc), DecReg).addReg(CounterReg);
+
+  // Jump if the counter is not zero yet.
+  BuildMI(fixShadowLoopMBB, DL, TII->get(X86::JNE_1)).addMBB(fixShadowLoopMBB);
+  fixShadowLoopMBB->addSuccessor(sinkMBB);
+  fixShadowLoopMBB->addSuccessor(fixShadowLoopMBB);
+
+  return sinkMBB;
+}
+
 MachineBasicBlock *
 X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
                                      MachineBasicBlock *MBB) const {
@@ -27801,13 +28037,21 @@ X86TargetLowering::emitEHSjLjLongJmp(Mac
   unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm;
   unsigned IJmpOpc = (PVT == MVT::i64) ? X86::JMP64r : X86::JMP32r;
 
+  MachineBasicBlock *thisMBB = MBB;
+
+  // When CET and shadow stack is enabled, we need to fix the Shadow Stack.
+  if (MF->getMMI().getModule()->getModuleFlag("cf-protection-return")) {
+    thisMBB = emitLongJmpShadowStackFix(MI, thisMBB);
+  }
+
   // Reload FP
-  MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), FP);
+  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), FP);
   for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
     MIB.add(MI.getOperand(i));
   MIB.setMemRefs(MMOBegin, MMOEnd);
+
   // Reload IP
-  MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), Tmp);
+  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), Tmp);
   for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
     if (i == X86::AddrDisp)
       MIB.addDisp(MI.getOperand(i), LabelOffset);
@@ -27815,8 +28059,9 @@ X86TargetLowering::emitEHSjLjLongJmp(Mac
       MIB.add(MI.getOperand(i));
   }
   MIB.setMemRefs(MMOBegin, MMOEnd);
+
   // Reload SP
-  MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), SP);
+  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), SP);
   for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
     if (i == X86::AddrDisp)
       MIB.addDisp(MI.getOperand(i), SPOffset);
@@ -27824,11 +28069,12 @@ X86TargetLowering::emitEHSjLjLongJmp(Mac
       MIB.add(MI.getOperand(i));
   }
   MIB.setMemRefs(MMOBegin, MMOEnd);
+
   // Jump
-  BuildMI(*MBB, MI, DL, TII->get(IJmpOpc)).addReg(Tmp);
+  BuildMI(*thisMBB, MI, DL, TII->get(IJmpOpc)).addReg(Tmp);
 
   MI.eraseFromParent();
-  return MBB;
+  return thisMBB;
 }
 
 void X86TargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=333990&r1=333989&r2=333990&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Tue Jun  5 02:22:30 2018
@@ -1331,9 +1331,15 @@ namespace llvm {
     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
                                         MachineBasicBlock *MBB) const;
 
+    void emitSetJmpShadowStackFix(MachineInstr &MI,
+                                  MachineBasicBlock *MBB) const;
+
     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
                                          MachineBasicBlock *MBB) const;
 
+    MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
+                                                 MachineBasicBlock *MBB) const;
+
     MachineBasicBlock *emitFMA3Instr(MachineInstr &MI,
                                      MachineBasicBlock *MBB) const;
 

Added: llvm/trunk/test/CodeGen/X86/shadow-stack.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/shadow-stack.ll?rev=333990&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/shadow-stack.ll (added)
+++ llvm/trunk/test/CodeGen/X86/shadow-stack.ll Tue Jun  5 02:22:30 2018
@@ -0,0 +1,244 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple x86_64-apple-macosx10.13.0 < %s | FileCheck %s --check-prefix=X86_64
+; RUN: llc -mtriple i386-apple-macosx10.13.0 < %s | FileCheck %s --check-prefix=X86
+
+; The MacOS tripples are used to get trapping behavior on the "unreachable" IR
+; instruction, so that the placement of the ud2 instruction could be verified.
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; The IR was created using the following C code:
+;; typedef void *jmp_buf;
+;; jmp_buf buf;
+;;
+;; __attribute__((noinline)) int bar(int i) {
+;;   int j = i - 111;
+;;   __builtin_longjmp(&buf, 1);
+;;   return j;
+;; }
+;;
+;; int foo(int i) {
+;;   int j = i * 11;
+;;   if (!__builtin_setjmp(&buf)) {
+;;     j += 33 + bar(j);
+;;   }
+;;   return j + i;
+;; }
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ at buf = common local_unnamed_addr global i8* null, align 8
+
+; Functions that use LongJmp should fix the Shadow Stack using previosuly saved
+; ShadowStackPointer in the input buffer.
+; The fix requires unwinding the shadow stack to the last SSP.
+define i32 @bar(i32 %i) local_unnamed_addr {
+; X86_64-LABEL: bar:
+; X86_64:       ## %bb.0: ## %entry
+; X86_64-NEXT:    pushq %rbp
+; X86_64-NEXT:    .cfi_def_cfa_offset 16
+; X86_64-NEXT:    .cfi_offset %rbp, -16
+; X86_64-NEXT:    movq _buf@{{.*}}(%rip), %rax
+; X86_64-NEXT:    movq (%rax), %rax
+; X86_64-NEXT:    xorq %rdx, %rdx
+; X86_64-NEXT:    rdsspq %rdx
+; X86_64-NEXT:    testq %rdx, %rdx
+; X86_64-NEXT:    je LBB0_5
+; X86_64-NEXT:  ## %bb.1: ## %entry
+; X86_64-NEXT:    movq 24(%rax), %rcx
+; X86_64-NEXT:    subq %rdx, %rcx
+; X86_64-NEXT:    jbe LBB0_5
+; X86_64-NEXT:  ## %bb.2: ## %entry
+; X86_64-NEXT:    shrq $3, %rcx
+; X86_64-NEXT:    incsspq %rcx
+; X86_64-NEXT:    shrq $8, %rcx
+; X86_64-NEXT:    je LBB0_5
+; X86_64-NEXT:  ## %bb.3: ## %entry
+; X86_64-NEXT:    shlq %rcx
+; X86_64-NEXT:    movq $128, %rdx
+; X86_64-NEXT:  LBB0_4: ## %entry
+; X86_64-NEXT:    ## =>This Inner Loop Header: Depth=1
+; X86_64-NEXT:    incsspq %rdx
+; X86_64-NEXT:    decq %rcx
+; X86_64-NEXT:    jne LBB0_4
+; X86_64-NEXT:  LBB0_5: ## %entry
+; X86_64-NEXT:    movq (%rax), %rbp
+; X86_64-NEXT:    movq 8(%rax), %rcx
+; X86_64-NEXT:    movq 16(%rax), %rsp
+; X86_64-NEXT:    jmpq *%rcx
+; X86_64-NEXT:    ud2
+;
+; X86-LABEL: bar:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %ebp, -8
+; X86-NEXT:    movl L_buf$non_lazy_ptr, %eax
+; X86-NEXT:    movl (%eax), %eax
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    rdsspd %edx
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    je LBB0_5
+; X86-NEXT:  ## %bb.1: ## %entry
+; X86-NEXT:    movl 12(%eax), %ecx
+; X86-NEXT:    subl %edx, %ecx
+; X86-NEXT:    jbe LBB0_5
+; X86-NEXT:  ## %bb.2: ## %entry
+; X86-NEXT:    shrl $2, %ecx
+; X86-NEXT:    incsspd %ecx
+; X86-NEXT:    shrl $8, %ecx
+; X86-NEXT:    je LBB0_5
+; X86-NEXT:  ## %bb.3: ## %entry
+; X86-NEXT:    shll %ecx
+; X86-NEXT:    movl $128, %edx
+; X86-NEXT:  LBB0_4: ## %entry
+; X86-NEXT:    ## =>This Inner Loop Header: Depth=1
+; X86-NEXT:    incsspd %edx
+; X86-NEXT:    decl %ecx
+; X86-NEXT:    jne LBB0_4
+; X86-NEXT:  LBB0_5: ## %entry
+; X86-NEXT:    movl (%eax), %ebp
+; X86-NEXT:    movl 4(%eax), %ecx
+; X86-NEXT:    movl 8(%eax), %esp
+; X86-NEXT:    jmpl *%ecx
+; X86-NEXT:    ud2
+entry:
+  %0 = load i8*, i8** @buf, align 8
+  tail call void @llvm.eh.sjlj.longjmp(i8* %0)
+  unreachable
+}
+
+declare void @llvm.eh.sjlj.longjmp(i8*)
+
+; Functions that call SetJmp should save the current ShadowStackPointer for
+; future fixing of the Shadow Stack.
+define i32 @foo(i32 %i) local_unnamed_addr {
+; X86_64-LABEL: foo:
+; X86_64:       ## %bb.0: ## %entry
+; X86_64-NEXT:    pushq %rbp
+; X86_64-NEXT:    .cfi_def_cfa_offset 16
+; X86_64-NEXT:    .cfi_offset %rbp, -16
+; X86_64-NEXT:    movq %rsp, %rbp
+; X86_64-NEXT:    .cfi_def_cfa_register %rbp
+; X86_64-NEXT:    pushq %r15
+; X86_64-NEXT:    pushq %r14
+; X86_64-NEXT:    pushq %r13
+; X86_64-NEXT:    pushq %r12
+; X86_64-NEXT:    pushq %rbx
+; X86_64-NEXT:    pushq %rax
+; X86_64-NEXT:    .cfi_offset %rbx, -56
+; X86_64-NEXT:    .cfi_offset %r12, -48
+; X86_64-NEXT:    .cfi_offset %r13, -40
+; X86_64-NEXT:    .cfi_offset %r14, -32
+; X86_64-NEXT:    .cfi_offset %r15, -24
+; X86_64-NEXT:    ## kill: def $edi killed $edi def $rdi
+; X86_64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; X86_64-NEXT:    movq _buf@{{.*}}(%rip), %rax
+; X86_64-NEXT:    movq (%rax), %rax
+; X86_64-NEXT:    movq %rbp, (%rax)
+; X86_64-NEXT:    movq %rsp, 16(%rax)
+; X86_64-NEXT:    leaq {{.*}}(%rip), %rcx
+; X86_64-NEXT:    movq %rcx, 8(%rax)
+; X86_64-NEXT:    xorq %rcx, %rcx
+; X86_64-NEXT:    rdsspq %rcx
+; X86_64-NEXT:    movq %rcx, 24(%rax)
+; X86_64-NEXT:    #EH_SjLj_Setup LBB1_4
+; X86_64-NEXT:  ## %bb.1: ## %entry
+; X86_64-NEXT:    xorl %eax, %eax
+; X86_64-NEXT:    testl %eax, %eax
+; X86_64-NEXT:    jne LBB1_3
+; X86_64-NEXT:    jmp LBB1_5
+; X86_64-NEXT:  LBB1_4: ## Block address taken
+; X86_64-NEXT:    ## %entry
+; X86_64-NEXT:    movl $1, %eax
+; X86_64-NEXT:    testl %eax, %eax
+; X86_64-NEXT:    je LBB1_5
+; X86_64-NEXT:  LBB1_3: ## %if.end
+; X86_64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload
+; X86_64-NEXT:    shll $2, %eax
+; X86_64-NEXT:    leal (%rax,%rax,2), %eax
+; X86_64-NEXT:    addq $8, %rsp
+; X86_64-NEXT:    popq %rbx
+; X86_64-NEXT:    popq %r12
+; X86_64-NEXT:    popq %r13
+; X86_64-NEXT:    popq %r14
+; X86_64-NEXT:    popq %r15
+; X86_64-NEXT:    popq %rbp
+; X86_64-NEXT:    retq
+; X86_64-NEXT:  LBB1_5: ## %if.then
+; X86_64-NEXT:    callq _bar
+; X86_64-NEXT:    ud2
+;
+; X86-LABEL: foo:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %ebp, -8
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    .cfi_def_cfa_register %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    subl $12, %esp
+; X86-NEXT:    .cfi_offset %esi, -20
+; X86-NEXT:    .cfi_offset %edi, -16
+; X86-NEXT:    .cfi_offset %ebx, -12
+; X86-NEXT:    movl L_buf$non_lazy_ptr, %eax
+; X86-NEXT:    movl (%eax), %eax
+; X86-NEXT:    movl %ebp, (%eax)
+; X86-NEXT:    movl %esp, 16(%eax)
+; X86-NEXT:    movl $LBB1_4, 4(%eax)
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    rdsspd %ecx
+; X86-NEXT:    movl %ecx, 12(%eax)
+; X86-NEXT:    #EH_SjLj_Setup LBB1_4
+; X86-NEXT:  ## %bb.1: ## %entry
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    jne LBB1_3
+; X86-NEXT:    jmp LBB1_5
+; X86-NEXT:  LBB1_4: ## Block address taken
+; X86-NEXT:    ## %entry
+; X86-NEXT:    movl $1, %eax
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    je LBB1_5
+; X86-NEXT:  LBB1_3: ## %if.end
+; X86-NEXT:    movl 8(%ebp), %eax
+; X86-NEXT:    shll $2, %eax
+; X86-NEXT:    leal (%eax,%eax,2), %eax
+; X86-NEXT:    addl $12, %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl
+; X86-NEXT:  LBB1_5: ## %if.then
+; X86-NEXT:    calll _bar
+; X86-NEXT:    ud2
+entry:
+  %0 = load i8*, i8** @buf, align 8
+  %1 = bitcast i8* %0 to i8**
+  %2 = tail call i8* @llvm.frameaddress(i32 0)
+  store i8* %2, i8** %1, align 8
+  %3 = tail call i8* @llvm.stacksave()
+  %4 = getelementptr inbounds i8, i8* %0, i64 16
+  %5 = bitcast i8* %4 to i8**
+  store i8* %3, i8** %5, align 8
+  %6 = tail call i32 @llvm.eh.sjlj.setjmp(i8* %0)
+  %tobool = icmp eq i32 %6, 0
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call = tail call i32 @bar(i32 undef)
+  unreachable
+
+if.end:                                           ; preds = %entry
+  %add2 = mul nsw i32 %i, 12
+  ret i32 %add2
+}
+
+declare i8* @llvm.frameaddress(i32)
+declare i8* @llvm.stacksave()
+declare i32 @llvm.eh.sjlj.setjmp(i8*)
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 4, !"cf-protection-return", i32 1}




More information about the llvm-commits mailing list