[llvm] d656ae2 - Enhance stack protector

Xiang1 Zhang via llvm-commits llvm-commits at lists.llvm.org
Sun Dec 11 16:43:59 PST 2022

Author: Xiang1 Zhang
Date: 2022-12-12T08:39:50+08:00
New Revision: d656ae28095726830f9beb8dbd4d69f5144ef821

URL: https://github.com/llvm/llvm-project/commit/d656ae28095726830f9beb8dbd4d69f5144ef821
DIFF: https://github.com/llvm/llvm-project/commit/d656ae28095726830f9beb8dbd4d69f5144ef821.diff

LOG: Enhance stack protector

Reviewed By: LuoYuanke

Differential Revision: https://reviews.llvm.org/D139254




diff  --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp
index f974bb67e440a..9a1063ed7f33b 100644
--- a/llvm/lib/CodeGen/StackProtector.cpp
+++ b/llvm/lib/CodeGen/StackProtector.cpp
@@ -415,11 +415,11 @@ static Value *getStackGuard(const TargetLoweringBase *TLI, Module *M,
 /// Returns true if the platform/triple supports the stackprotectorcreate pseudo
 /// node.
-static bool CreatePrologue(Function *F, Module *M, ReturnInst *RI,
+static bool CreatePrologue(Function *F, Module *M, Instruction *CheckLoc,
                            const TargetLoweringBase *TLI, AllocaInst *&AI) {
   bool SupportsSelectionDAGSP = false;
   IRBuilder<> B(&F->getEntryBlock().front());
-  PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext());
+  PointerType *PtrTy = Type::getInt8PtrTy(CheckLoc->getContext());
   AI = B.CreateAlloca(PtrTy, nullptr, "StackGuardSlot");
   Value *GuardSlot = getStackGuard(TLI, M, B, &SupportsSelectionDAGSP);
@@ -442,16 +442,34 @@ bool StackProtector::InsertStackProtectors() {
       TLI->useStackGuardXorFP() ||
       (EnableSelectionDAGSP && !TM->Options.EnableFastISel);
   AllocaInst *AI = nullptr; // Place on stack that stores the stack guard.
+  bool RecalculateDT = false;
+  BasicBlock *FailBB = nullptr;
   for (BasicBlock &BB : llvm::make_early_inc_range(*F)) {
-    ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator());
-    if (!RI)
+    // This is stack protector auto generated check BB, skip it.
+    if (&BB == FailBB)
+      continue;
+    Instruction *CheckLoc = dyn_cast<ReturnInst>(BB.getTerminator());
+    if (!CheckLoc) {
+      for (auto &Inst : BB) {
+        auto *CB = dyn_cast<CallBase>(&Inst);
+        if (!CB)
+          continue;
+        if (!CB->doesNotReturn())
+          continue;
+        // Do stack check before non-return calls (e.g: __cxa_throw)
+        CheckLoc = CB;
+        break;
+      }
+    }
+    if (!CheckLoc)
     // Generate prologue instrumentation if not already generated.
     if (!HasPrologue) {
       HasPrologue = true;
-      SupportsSelectionDAGSP &= CreatePrologue(F, M, RI, TLI, AI);
+      SupportsSelectionDAGSP &= CreatePrologue(F, M, CheckLoc, TLI, AI);
     // SelectionDAG based code generation. Nothing else needs to be done here.
@@ -477,8 +495,7 @@ bool StackProtector::InsertStackProtectors() {
     // verifier guarantees that a tail call is either directly before the
     // return or with a single correct bitcast of the return value in between so
     // we don't need to worry about many situations here.
-    Instruction *CheckLoc = RI;
-    Instruction *Prev = RI->getPrevNonDebugInstruction();
+    Instruction *Prev = CheckLoc->getPrevNonDebugInstruction();
     if (Prev && isa<CallInst>(Prev) && cast<CallInst>(Prev)->isTailCall())
       CheckLoc = Prev;
     else if (Prev) {
@@ -528,18 +545,13 @@ bool StackProtector::InsertStackProtectors() {
       // Create the FailBB. We duplicate the BB every time since the MI tail
       // merge pass will merge together all of the various BB into one including
       // fail BB generated by the stack protector pseudo instruction.
-      BasicBlock *FailBB = CreateFailBB();
+      if (!FailBB)
+        FailBB = CreateFailBB();
       // Split the basic block before the return instruction.
       BasicBlock *NewBB =
           BB.splitBasicBlock(CheckLoc->getIterator(), "SP_return");
-      // Update the dominator tree if we need to.
-      if (DT && DT->isReachableFromEntry(&BB)) {
-        DT->addNewBlock(NewBB, &BB);
-        DT->addNewBlock(FailBB, &BB);
-      }
       // Remove default branch instruction to the new BB.
@@ -560,9 +572,38 @@ bool StackProtector::InsertStackProtectors() {
       B.CreateCondBr(Cmp, NewBB, FailBB, Weights);
+      // Update the dominator tree if we need to.
+      if (DT && DT->isReachableFromEntry(&BB))
+        RecalculateDT = true;
+  // TODO: Refine me, use faster way to update DT.
+  // Now we have spilt the BB, some like:
+  // ===================================
+  // BB:
+  //   RetOrNoReturnCall
+  // ==>
+  // BB:
+  //  CondBr
+  // NewBB:
+  //   RetOrNoReturnCall
+  // FailBB: (*)
+  //   HandleStackCheckFail
+  // ===================================
+  // The faster way should cover:
+  // For NewBB, it should success the old BB's dominatees.
+  // 1) return: it didn't have dominatee
+  // 2) no-return call: there may has dominatees.
+  //
+  // For FailBB, it may be created before, So
+  // 1) if it has 1 Predecessors, add it into DT.
+  // 2) if it has 2 Predecessors, it should has no dominator, remove it from DT.
+  // 3) if it has 3 or more Predecessors, DT has removed it, do nothing.
+  if (RecalculateDT)
+    DT->recalculate(*F);
   // Return if we didn't modify any basic blocks. i.e., there are no return
   // statements in the function.
   return HasPrologue;

diff  --git a/llvm/test/CodeGen/X86/stack-protector-2.ll b/llvm/test/CodeGen/X86/stack-protector-2.ll
index c6971a59f813f..f2fc64ab0c866 100644
--- a/llvm/test/CodeGen/X86/stack-protector-2.ll
+++ b/llvm/test/CodeGen/X86/stack-protector-2.ll
@@ -192,4 +192,34 @@ define dso_local void @bar_nossp(i64 %0) {
   ret void
+; Check stack protect for noreturn call
+define dso_local i32 @foo_no_return(i32 %0) #1 {
+; CHECK-LABEL: @foo_no_return
+  %cmp = icmp sgt i32 %0, 4
+  br i1 %cmp, label %if.then, label %if.end
+; CHECK:      if.then:                                          ; preds = %entry
+; CHECK-NEXT:   %StackGuard1 = load volatile i8*, i8* addrspace(257)* inttoptr (i32 40 to i8* addrspace(257)*), align 8
+; CHECK-NEXT:   %1 = load volatile i8*, i8** %StackGuardSlot, align 8
+; CHECK-NEXT:   %2 = icmp eq i8* %StackGuard1, %1
+; CHECK-NEXT:   br i1 %2, label %SP_return, label %CallStackCheckFailBlk
+; CHECK:      SP_return:                                        ; preds = %if.then
+; CHECK-NEXT:   %call = call i32 @foo_no_return(i32 1)
+; CHECK-NEXT:   br label %return
+; CHECK:      if.end:                                           ; preds = %entry
+; CHECK-NEXT:   br label %return
+if.then:                                          ; preds = %entry
+  %call = call i32 @foo_no_return(i32 1)
+  br label %return
+if.end:                                           ; preds = %entry
+  br label %return
+return:                                           ; preds = %if.end, %if.then
+  ret i32 0
 attributes #0 = { sspstrong }
+attributes #1 = { noreturn sspreq}

diff  --git a/llvm/test/CodeGen/X86/stack-protector-no-return.ll b/llvm/test/CodeGen/X86/stack-protector-no-return.ll
index b8ea5bd07ee5e..8f391c17285a3 100644
--- a/llvm/test/CodeGen/X86/stack-protector-no-return.ll
+++ b/llvm/test/CodeGen/X86/stack-protector-no-return.ll
@@ -1,146 +1,63 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc %s -mtriple=x86_64-unknown-linux-gnu -o - -verify-dom-info | FileCheck %s
-$__clang_call_terminate = comdat any
- at _ZTIi = external dso_local constant i8*
- at .str = private unnamed_addr constant [5 x i8] c"win\0A\00", align 1
-; Function Attrs: mustprogress noreturn sspreq uwtable
-define dso_local void @_Z7catchesv() local_unnamed_addr #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
-  %exception = tail call i8* @__cxa_allocate_exception(i64 4) #8
-  %0 = bitcast i8* %exception to i32*
-  store i32 1, i32* %0, align 16
-  invoke void @__cxa_throw(i8* nonnull %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #9
-          to label %unreachable unwind label %lpad
-lpad:                                             ; preds = %entry
-  %1 = landingpad { i8*, i32 }
-          catch i8* null
-  %2 = extractvalue { i8*, i32 } %1, 0
-  %3 = tail call i8* @__cxa_begin_catch(i8* %2) #8
-  %call = invoke i64 @write(i32 noundef 1, i8* noundef getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i64 noundef 4)
-          to label %invoke.cont unwind label %lpad1
-invoke.cont:                                      ; preds = %lpad
-  invoke void @_exit(i32 noundef 1) #9
-          to label %invoke.cont2 unwind label %lpad1
-invoke.cont2:                                     ; preds = %invoke.cont
-  unreachable
-lpad1:                                            ; preds = %invoke.cont, %lpad
-  %4 = landingpad { i8*, i32 }
-          cleanup
-  invoke void @__cxa_end_catch()
-          to label %eh.resume unwind label %terminate.lpad
-eh.resume:                                        ; preds = %lpad1
-  resume { i8*, i32 } %4
-terminate.lpad:                                   ; preds = %lpad1
-  %5 = landingpad { i8*, i32 }
-          catch i8* null
-  %6 = extractvalue { i8*, i32 } %5, 0
-  tail call void @__clang_call_terminate(i8* %6) #10
-  unreachable
-unreachable:                                      ; preds = %entry
-  unreachable
-; Function Attrs: nofree
-declare dso_local noalias i8* @__cxa_allocate_exception(i64) local_unnamed_addr #1
-; Function Attrs: nofree noreturn
-declare dso_local void @__cxa_throw(i8*, i8*, i8*) local_unnamed_addr #2
-declare dso_local i32 @__gxx_personality_v0(...)
-; Function Attrs: nofree
-declare dso_local i8* @__cxa_begin_catch(i8*) local_unnamed_addr #1
-; Function Attrs: nofree
-declare dso_local noundef i64 @write(i32 noundef, i8* nocapture noundef readonly, i64 noundef) local_unnamed_addr #3
-; Function Attrs: nofree noreturn
-declare dso_local void @_exit(i32 noundef) local_unnamed_addr #4
-; Function Attrs: nofree
-declare dso_local void @__cxa_end_catch() local_unnamed_addr #1
-; Function Attrs: noinline noreturn nounwind
-define linkonce_odr hidden void @__clang_call_terminate(i8* %0) local_unnamed_addr #5 comdat {
-; CHECK-LABEL: __clang_call_terminate:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    callq __cxa_begin_catch
-; CHECK-NEXT:    callq _ZSt9terminatev
-  %2 = tail call i8* @__cxa_begin_catch(i8* %0) #8
-  tail call void @_ZSt9terminatev() #10
-  unreachable
-; Function Attrs: nofree noreturn nounwind
-declare dso_local void @_ZSt9terminatev() local_unnamed_addr #6
-; Function Attrs: mustprogress nofree sspreq uwtable
-define dso_local void @_Z4vulni(i32 noundef %op) local_unnamed_addr #7 {
-; CHECK-LABEL: _Z4vulni:
+; Function Attrs: sspreq
+define void @_Z7catchesv() #0 personality i8* null {
+; CHECK-LABEL: _Z7catchesv:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    movq %fs:40, %rax
 ; CHECK-NEXT:    movq %rax, (%rsp)
-; CHECK-NEXT:    cmpl $1, %edi
-; CHECK-NEXT:    je .LBB2_3
-; CHECK-NEXT:  # %bb.1: # %if.end
+; CHECK-NEXT:  .Ltmp0:
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    xorl %esi, %esi
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    callq *%rax
+; CHECK-NEXT:  .Ltmp1:
+; CHECK-NEXT:  # %bb.1: # %invoke.cont
+; CHECK-NEXT:    movq %fs:40, %rax
+; CHECK-NEXT:    cmpq (%rsp), %rax
+; CHECK-NEXT:    jne .LBB0_6
+; CHECK-NEXT:  # %bb.2: # %SP_return
+; CHECK-NEXT:  .Ltmp2:
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    callq *%rax
+; CHECK-NEXT:  .Ltmp3:
+; CHECK-NEXT:  # %bb.3: # %invoke.cont2
+; CHECK-NEXT:  .LBB0_4: # %lpad1
+; CHECK-NEXT:  .Ltmp4:
 ; CHECK-NEXT:    movq %fs:40, %rax
 ; CHECK-NEXT:    cmpq (%rsp), %rax
-; CHECK-NEXT:    jne .LBB2_2
-; CHECK-NEXT:  # %bb.4: # %SP_return
+; CHECK-NEXT:    jne .LBB0_6
+; CHECK-NEXT:  # %bb.5: # %SP_return2
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
-; CHECK-NEXT:  .LBB2_3: # %if.then
+; CHECK-NEXT:  .LBB0_6: # %CallStackCheckFailBlk
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    movl $4, %edi
-; CHECK-NEXT:    callq __cxa_allocate_exception
-; CHECK-NEXT:    movl $1, (%rax)
-; CHECK-NEXT:    movl $_ZTIi, %esi
-; CHECK-NEXT:    movq %rax, %rdi
-; CHECK-NEXT:    xorl %edx, %edx
-; CHECK-NEXT:    callq __cxa_throw
-; CHECK-NEXT:  .LBB2_2: # %CallStackCheckFailBlk
 ; CHECK-NEXT:    callq __stack_chk_fail at PLT
-  %cmp = icmp eq i32 %op, 1
-  br i1 %cmp, label %if.then, label %if.end
+  %call = invoke i64 null(i32 0, i8* null, i64 0)
+          to label %invoke.cont unwind label %lpad1
-if.then:                                          ; preds = %entry
-  %exception = tail call i8* @__cxa_allocate_exception(i64 4) #8
-  %0 = bitcast i8* %exception to i32*
-  store i32 1, i32* %0, align 16
-  tail call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #9
+invoke.cont:                                      ; preds = %entry
+  invoke void null(i32 0) #1
+          to label %invoke.cont2 unwind label %lpad1
+invoke.cont2:                                     ; preds = %invoke.cont
-if.end:                                           ; preds = %entry
+lpad1:                                            ; preds = %invoke.cont, %entry
+  %0 = landingpad { i8*, i32 }
+          cleanup
   ret void
-attributes #0 = { mustprogress noreturn sspreq uwtable }
-attributes #1 = { nofree }
-attributes #2 = { nofree noreturn }
-attributes #3 = { nofree }
-attributes #4 = { nofree noreturn }
-attributes #5 = { noinline noreturn nounwind }
-attributes #6 = { nofree noreturn nounwind }
-attributes #7 = { mustprogress nofree sspreq uwtable }
-attributes #8 = { nounwind }
-attributes #9 = { noreturn }
-attributes #10 = { noreturn nounwind }
+; uselistorder directives
+uselistorder i8* null, { 1, 0 }
+attributes #0 = { sspreq }
+attributes #1 = { noreturn }

diff  --git a/llvm/test/CodeGen/X86/stack-protector-recursively.ll b/llvm/test/CodeGen/X86/stack-protector-recursively.ll
new file mode 100644
index 0000000000000..383af168de775
--- /dev/null
+++ b/llvm/test/CodeGen/X86/stack-protector-recursively.ll
@@ -0,0 +1,26 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-pc-linux-gnu -o - < %s | FileCheck %s
+; Make sure the stack protect not infinitly check __stack_chk_fail.
+define dso_local void @__stack_chk_fail() local_unnamed_addr #0 {
+; CHECK-LABEL: __stack_chk_fail:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    movq %fs:40, %rax
+; CHECK-NEXT:    movq %rax, (%rsp)
+; CHECK-NEXT:    movq %fs:40, %rax
+; CHECK-NEXT:    cmpq (%rsp), %rax
+; CHECK-NEXT:    jne .LBB0_2
+; CHECK-NEXT:  # %bb.1: # %SP_return
+; CHECK-NEXT:    ud2
+; CHECK-NEXT:  .LBB0_2: # %CallStackCheckFailBlk
+; CHECK-NEXT:    callq __stack_chk_fail
+  tail call void @llvm.trap()
+  unreachable
+declare void @llvm.trap() #1
+attributes #0 = { noreturn nounwind sspreq }
+attributes #1 = { noreturn nounwind }


More information about the llvm-commits mailing list