[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: improve handling of unreachable basic blocks (PR #136183)

Anatoly Trosinenko via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Apr 30 07:54:47 PDT 2025


https://github.com/atrosinenko updated https://github.com/llvm/llvm-project/pull/136183

>From 352364dfc00d23111fc44bc807d3484bc67aff00 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Thu, 17 Apr 2025 20:51:16 +0300
Subject: [PATCH 1/2] [BOLT] Gadget scanner: improve handling of unreachable
 basic blocks

Instead of refusing to analyze an instruction completely, when it is
unreachable according to the CFG reconstructed by BOLT, pessimistically
assume all registers to be unsafe at the start of basic blocks without
any predecessors. Nevertheless, unreachable basic blocks found in
optimized code likely means imprecise CFG reconstruction, thus report a
warning once per basic block without predecessors.
---
 bolt/lib/Passes/PAuthGadgetScanner.cpp        | 46 ++++++++++-----
 .../AArch64/gs-pacret-autiasp.s               |  7 ++-
 .../binary-analysis/AArch64/gs-pauth-calls.s  | 57 +++++++++++++++++++
 3 files changed, 95 insertions(+), 15 deletions(-)

diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index 7a86d9c4c9a59..917649731884e 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -344,6 +344,12 @@ class SrcSafetyAnalysis {
     return S;
   }
 
+  /// Creates a state with all registers marked unsafe (not to be confused
+  /// with empty state).
+  SrcState createUnsafeState() const {
+    return SrcState(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters());
+  }
+
   BitVector getClobberedRegs(const MCInst &Point) const {
     BitVector Clobbered(NumRegs);
     // Assume a call can clobber all registers, including callee-saved
@@ -586,6 +592,13 @@ class DataflowSrcSafetyAnalysis
     if (BB.isEntryPoint())
       return createEntryState();
 
+    // If a basic block without any predecessors is found in an optimized code,
+    // this likely means that some CFG edges were not detected. Pessimistically
+    // assume all registers to be unsafe before this basic block and warn about
+    // this fact in FunctionAnalysis::findUnsafeUses().
+    if (BB.pred_empty())
+      return createUnsafeState();
+
     return SrcState();
   }
 
@@ -659,12 +672,6 @@ class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis {
       BC.MIB->removeAnnotation(I.second, StateAnnotationIndex);
   }
 
-  /// Creates a state with all registers marked unsafe (not to be confused
-  /// with empty state).
-  SrcState createUnsafeState() const {
-    return SrcState(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters());
-  }
-
 public:
   CFGUnawareSrcSafetyAnalysis(BinaryFunction &BF,
                               MCPlusBuilder::AllocatorIdTy AllocId,
@@ -1335,19 +1342,30 @@ void FunctionAnalysisContext::findUnsafeUses(
     BF.dump();
   });
 
+  if (BF.hasCFG()) {
+    // Warn on basic blocks being unreachable according to BOLT, as this
+    // likely means CFG is imprecise.
+    for (BinaryBasicBlock &BB : BF) {
+      if (!BB.pred_empty() || BB.isEntryPoint())
+        continue;
+      // Arbitrarily attach the report to the first instruction of BB.
+      MCInst *InstToReport = BB.getFirstNonPseudoInstr();
+      if (!InstToReport)
+        continue; // BB has no real instructions
+
+      Reports.push_back(
+          make_generic_report(MCInstReference::get(InstToReport, BF),
+                              "Warning: no predecessor basic blocks detected "
+                              "(possibly incomplete CFG)"));
+    }
+  }
+
   iterateOverInstrs(BF, [&](MCInstReference Inst) {
     if (BC.MIB->isCFI(Inst))
       return;
 
     const SrcState &S = Analysis->getStateBefore(Inst);
-
-    // If non-empty state was never propagated from the entry basic block
-    // to Inst, assume it to be unreachable and report a warning.
-    if (S.empty()) {
-      Reports.push_back(
-          make_generic_report(Inst, "Warning: unreachable instruction found"));
-      return;
-    }
+    assert(!S.empty() && "Instruction has no associated state");
 
     if (auto Report = shouldReportReturnGadget(BC, Inst, S))
       Reports.push_back(*Report);
diff --git a/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s b/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s
index 284f0bea607a5..6559ba336e8de 100644
--- a/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s
+++ b/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s
@@ -215,12 +215,17 @@ f_callclobbered_calleesaved:
         .globl  f_unreachable_instruction
         .type   f_unreachable_instruction, at function
 f_unreachable_instruction:
-// CHECK-LABEL: GS-PAUTH: Warning: unreachable instruction found in function f_unreachable_instruction, basic block {{[0-9a-zA-Z.]+}}, at address
+// CHECK-LABEL: GS-PAUTH: Warning: no predecessor basic blocks detected (possibly incomplete CFG) in function f_unreachable_instruction, basic block {{[0-9a-zA-Z.]+}}, at address
 // CHECK-NEXT:    The instruction is     {{[0-9a-f]+}}:       add     x0, x1, x2
 // CHECK-NOT:   instructions that write to the affected registers after any authentication are:
+// CHECK-LABEL: GS-PAUTH: non-protected ret found in function f_unreachable_instruction, basic block {{[0-9a-zA-Z.]+}}, at address
+// CHECK-NEXT:    The instruction is     {{[0-9a-f]+}}:       ret
+// CHECK-NEXT:  The 0 instructions that write to the affected registers after any authentication are:
         b       1f
         add     x0, x1, x2
 1:
+        // "ret" is reported as unprotected, as LR is pessimistically assumed
+        // unsafe at "add x0, x1, x2", thus it is unsafe at "ret" as well.
         ret
         .size f_unreachable_instruction, .-f_unreachable_instruction
 
diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-calls.s b/bolt/test/binary-analysis/AArch64/gs-pauth-calls.s
index c79c5926a05cd..9d01431e809ab 100644
--- a/bolt/test/binary-analysis/AArch64/gs-pauth-calls.s
+++ b/bolt/test/binary-analysis/AArch64/gs-pauth-calls.s
@@ -1428,6 +1428,63 @@ printed_instrs_nocfg:
         br      x0
         .size   printed_instrs_nocfg, .-printed_instrs_nocfg
 
+// Test handling of unreachable basic blocks.
+//
+// Basic blocks without any predecessors were observed in real-world optimized
+// code. At least sometimes they were actually reachable via jump table, which
+// was not detected, but the function was processed as if its CFG was
+// reconstructed successfully.
+//
+// As a more predictable model example, let's use really unreachable code
+// for testing.
+
+        .globl  bad_unreachable_call
+        .type   bad_unreachable_call, at function
+bad_unreachable_call:
+// CHECK-LABEL: GS-PAUTH: Warning: no predecessor basic blocks detected (possibly incomplete CFG) in function bad_unreachable_call, basic block {{[^,]+}}, at address
+// CHECK-NEXT:  The instruction is     {{[0-9a-f]+}}:      blr     x0
+// CHECK-NOT:   instructions that write to the affected registers after any authentication are:
+// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_unreachable_call, basic block {{[^,]+}}, at address
+// CHECK-NEXT:  The instruction is     {{[0-9a-f]+}}:      blr     x0
+// CHECK-NEXT:  The 0 instructions that write to the affected registers after any authentication are:
+        paciasp
+        stp     x29, x30, [sp, #-16]!
+        mov     x29, sp
+
+        b       1f
+        // unreachable basic block:
+        blr     x0
+
+1:      // reachable basic block:
+        ldp     x29, x30, [sp], #16
+        autiasp
+        ret
+        .size bad_unreachable_call, .-bad_unreachable_call
+
+        .globl  good_unreachable_call
+        .type   good_unreachable_call, at function
+good_unreachable_call:
+// CHECK-NOT: non-protected call{{.*}}good_unreachable_call
+// CHECK-LABEL: GS-PAUTH: Warning: no predecessor basic blocks detected (possibly incomplete CFG) in function good_unreachable_call, basic block {{[^,]+}}, at address
+// CHECK-NEXT:  The instruction is     {{[0-9a-f]+}}:      autia   x0, x1
+// CHECK-NOT: instructions that write to the affected registers after any authentication are:
+// CHECK-NOT: non-protected call{{.*}}good_unreachable_call
+        paciasp
+        stp     x29, x30, [sp, #-16]!
+        mov     x29, sp
+
+        b       1f
+        // unreachable basic block:
+        autia   x0, x1
+        blr     x0      // <-- this call is definitely protected provided at least
+                        //     basic block boundaries are detected correctly
+
+1:      // reachable basic block:
+        ldp     x29, x30, [sp], #16
+        autiasp
+        ret
+        .size good_unreachable_call, .-good_unreachable_call
+
         .globl  main
         .type   main, at function
 main:

>From e5fdebd456c5f201a90e10833c5d7f12a2f09667 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Wed, 30 Apr 2025 11:07:44 +0300
Subject: [PATCH 2/2] Fix handling of unreachable loops of BBs

---
 bolt/lib/Passes/PAuthGadgetScanner.cpp        | 49 ++++++++++++++-----
 .../AArch64/gs-pacret-autiasp.s               |  2 +-
 .../binary-analysis/AArch64/gs-pauth-calls.s  | 31 +++++++++++-
 3 files changed, 68 insertions(+), 14 deletions(-)

diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index 917649731884e..d4282daad648b 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -1342,21 +1342,42 @@ void FunctionAnalysisContext::findUnsafeUses(
     BF.dump();
   });
 
+  bool UnreachableBBReported = false;
   if (BF.hasCFG()) {
-    // Warn on basic blocks being unreachable according to BOLT, as this
-    // likely means CFG is imprecise.
+    // Warn on basic blocks being unreachable according to BOLT (at most once
+    // per BinaryFunction), as this likely means the CFG reconstructed by BOLT
+    // is imprecise. A basic block can be
+    // * reachable from an entry basic block - a hopefully correct non-empty
+    //   state is propagated to that basic block sooner or later. All basic
+    //   blocks are expected to belong to this category under normal conditions.
+    // * reachable from a "directly unreachable" BB (a basic block that has no
+    //   direct predecessors and this is not because it is an entry BB) - *some*
+    //   non-empty state is propagated to this basic block sooner or later, as
+    //   the initial state of directly unreachable basic blocks is
+    //   pessimistically initialized to "all registers are unsafe"
+    //   - a warning can be printed for the "directly unreachable" basic block
+    // * neither reachable from an entry nor from a "directly unreachable" BB
+    //   (such as if this BB is in an isolated loop of basic blocks) - the final
+    //   state is computed to be empty for this basic block
+    //   - a warning can be printed for this basic block
     for (BinaryBasicBlock &BB : BF) {
-      if (!BB.pred_empty() || BB.isEntryPoint())
+      MCInst *FirstInst = BB.getFirstNonPseudoInstr();
+      // Skip empty basic block early for simplicity.
+      if (!FirstInst)
+        continue;
+
+      bool IsDirectlyUnreachable = BB.pred_empty() && !BB.isEntryPoint();
+      bool HasNoStateComputed = Analysis->getStateBefore(*FirstInst).empty();
+      if (!IsDirectlyUnreachable && !HasNoStateComputed)
         continue;
-      // Arbitrarily attach the report to the first instruction of BB.
-      MCInst *InstToReport = BB.getFirstNonPseudoInstr();
-      if (!InstToReport)
-        continue; // BB has no real instructions
 
+      // Arbitrarily attach the report to the first instruction of BB.
       Reports.push_back(
-          make_generic_report(MCInstReference::get(InstToReport, BF),
-                              "Warning: no predecessor basic blocks detected "
-                              "(possibly incomplete CFG)"));
+          make_generic_report(MCInstReference::get(FirstInst, BF),
+                              "Warning: the function has unreachable basic "
+                              "blocks (possibly incomplete CFG)"));
+      UnreachableBBReported = true;
+      break; // One warning per function.
     }
   }
 
@@ -1365,7 +1386,13 @@ void FunctionAnalysisContext::findUnsafeUses(
       return;
 
     const SrcState &S = Analysis->getStateBefore(Inst);
-    assert(!S.empty() && "Instruction has no associated state");
+    if (S.empty()) {
+      LLVM_DEBUG(
+          { traceInst(BC, "Instruction has no state, skipping", Inst); });
+      assert(UnreachableBBReported && "Should be reported at least once");
+      (void)UnreachableBBReported;
+      return;
+    }
 
     if (auto Report = shouldReportReturnGadget(BC, Inst, S))
       Reports.push_back(*Report);
diff --git a/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s b/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s
index 6559ba336e8de..88c581f14a84d 100644
--- a/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s
+++ b/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s
@@ -215,7 +215,7 @@ f_callclobbered_calleesaved:
         .globl  f_unreachable_instruction
         .type   f_unreachable_instruction, at function
 f_unreachable_instruction:
-// CHECK-LABEL: GS-PAUTH: Warning: no predecessor basic blocks detected (possibly incomplete CFG) in function f_unreachable_instruction, basic block {{[0-9a-zA-Z.]+}}, at address
+// CHECK-LABEL: GS-PAUTH: Warning: the function has unreachable basic blocks (possibly incomplete CFG) in function f_unreachable_instruction, basic block {{[0-9a-zA-Z.]+}}, at address
 // CHECK-NEXT:    The instruction is     {{[0-9a-f]+}}:       add     x0, x1, x2
 // CHECK-NOT:   instructions that write to the affected registers after any authentication are:
 // CHECK-LABEL: GS-PAUTH: non-protected ret found in function f_unreachable_instruction, basic block {{[0-9a-zA-Z.]+}}, at address
diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-calls.s b/bolt/test/binary-analysis/AArch64/gs-pauth-calls.s
index 9d01431e809ab..5f49918c39c94 100644
--- a/bolt/test/binary-analysis/AArch64/gs-pauth-calls.s
+++ b/bolt/test/binary-analysis/AArch64/gs-pauth-calls.s
@@ -1441,7 +1441,7 @@ printed_instrs_nocfg:
         .globl  bad_unreachable_call
         .type   bad_unreachable_call, at function
 bad_unreachable_call:
-// CHECK-LABEL: GS-PAUTH: Warning: no predecessor basic blocks detected (possibly incomplete CFG) in function bad_unreachable_call, basic block {{[^,]+}}, at address
+// CHECK-LABEL: GS-PAUTH: Warning: the function has unreachable basic blocks (possibly incomplete CFG) in function bad_unreachable_call, basic block {{[^,]+}}, at address
 // CHECK-NEXT:  The instruction is     {{[0-9a-f]+}}:      blr     x0
 // CHECK-NOT:   instructions that write to the affected registers after any authentication are:
 // CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_unreachable_call, basic block {{[^,]+}}, at address
@@ -1465,7 +1465,7 @@ bad_unreachable_call:
         .type   good_unreachable_call, at function
 good_unreachable_call:
 // CHECK-NOT: non-protected call{{.*}}good_unreachable_call
-// CHECK-LABEL: GS-PAUTH: Warning: no predecessor basic blocks detected (possibly incomplete CFG) in function good_unreachable_call, basic block {{[^,]+}}, at address
+// CHECK-LABEL: GS-PAUTH: Warning: the function has unreachable basic blocks (possibly incomplete CFG) in function good_unreachable_call, basic block {{[^,]+}}, at address
 // CHECK-NEXT:  The instruction is     {{[0-9a-f]+}}:      autia   x0, x1
 // CHECK-NOT: instructions that write to the affected registers after any authentication are:
 // CHECK-NOT: non-protected call{{.*}}good_unreachable_call
@@ -1485,6 +1485,33 @@ good_unreachable_call:
         ret
         .size good_unreachable_call, .-good_unreachable_call
 
+        .globl  unreachable_loop_of_bbs
+        .type   unreachable_loop_of_bbs, at function
+unreachable_loop_of_bbs:
+// CHECK-NOT: unreachable basic blocks{{.*}}unreachable_loop_of_bbs
+// CHECK-NOT: non-protected call{{.*}}unreachable_loop_of_bbs
+// CHECK-LABEL: GS-PAUTH: Warning: the function has unreachable basic blocks (possibly incomplete CFG) in function unreachable_loop_of_bbs, basic block {{[^,]+}}, at address
+// CHECK-NEXT:  The instruction is     {{[0-9a-f]+}}:      blr     x0
+// CHECK-NOT: unreachable basic blocks{{.*}}unreachable_loop_of_bbs
+// CHECK-NOT: non-protected call{{.*}}unreachable_loop_of_bbs
+        paciasp
+        stp     x29, x30, [sp, #-16]!
+        mov     x29, sp
+        b       .Lreachable_epilogue_bb
+
+.Lfirst_unreachable_bb:
+        blr     x0      // <-- this call is not analyzed
+        b       .Lsecond_unreachable_bb
+.Lsecond_unreachable_bb:
+        blr     x1      // <-- this call is not analyzed
+        b       .Lfirst_unreachable_bb
+
+.Lreachable_epilogue_bb:
+        ldp     x29, x30, [sp], #16
+        autiasp
+        ret
+        .size unreachable_loop_of_bbs, .-unreachable_loop_of_bbs
+
         .globl  main
         .type   main, at function
 main:



More information about the llvm-branch-commits mailing list