[llvm] [BOLT][AArch64] Remove assertions from jump table heuristic (PR #124372)

Maksim Panchenko via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 24 16:38:47 PST 2025


https://github.com/maksfb updated https://github.com/llvm/llvm-project/pull/124372

>From ca6aa8b423af784f1e8c799510f4e5f1fb90940b Mon Sep 17 00:00:00 2001
From: Maksim Panchenko <maks at fb.com>
Date: Fri, 24 Jan 2025 14:11:31 -0800
Subject: [PATCH 1/2] [BOLT][AArch64] Remove assertions from jump table
 heuristic

The code for jump table detection on AArch64 asserts liberally whenever
the input instruction sequence does not match the expected pattern. As a
result, BOLT fails to process binaries with such sequences instead of
ignoring functions with unknown control flow.

Remove asserts in analyzeIndirectBranchFragment() and mark indirect
jumps as instructions with unknown control flow instead.
---
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   | 45 ++++++++++---------
 bolt/test/AArch64/jump-table-heuristic-fail.s | 29 ++++++++++++
 bolt/test/AArch64/test-indirect-branch.s      |  9 ++--
 3 files changed, 58 insertions(+), 25 deletions(-)
 create mode 100644 bolt/test/AArch64/jump-table-heuristic-fail.s

diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index ac709c5dd063ac..c1055311141322 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -834,6 +834,8 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
   ///                                   #  of this BB)
   ///   br      x0                      # Indirect jump instruction
   ///
+  /// Return true on successful jump table instruction sequence match, false
+  /// otherwise.
   bool analyzeIndirectBranchFragment(
       const MCInst &Inst,
       DenseMap<const MCInst *, SmallVector<MCInst *, 4>> &UDChain,
@@ -842,6 +844,8 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     // Expect AArch64 BR
     assert(Inst.getOpcode() == AArch64::BR && "Unexpected opcode");
 
+    JumpTable = nullptr;
+
     // Match the indirect branch pattern for aarch64
     SmallVector<MCInst *, 4> &UsesRoot = UDChain[&Inst];
     if (UsesRoot.size() == 0 || UsesRoot[0] == nullptr)
@@ -879,8 +883,8 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
       // Parsed as ADDXrs reg:x8 reg:x8 reg:x12 imm:0
       return false;
     }
-    assert(DefAdd->getOpcode() == AArch64::ADDXrx &&
-           "Failed to match indirect branch!");
+    if (DefAdd->getOpcode() != AArch64::ADDXrx)
+      return false;
 
     // Validate ADD operands
     int64_t OperandExtension = DefAdd->getOperand(3).getImm();
@@ -897,8 +901,8 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
       //   ldr     w7, [x6]
       //   add     x6, x6, w7, sxtw => no shift amount
       //   br      x6
-      errs() << "BOLT-WARNING: "
-                "Failed to match indirect branch: ShiftVAL != 2 \n";
+      dbgs() << "BOLT-DEBUG: "
+                "failed to match indirect branch: ShiftVAL != 2\n";
       return false;
     }
 
@@ -909,7 +913,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     else if (ExtendType == AArch64_AM::SXTW)
       ScaleValue = 4LL;
     else
-      llvm_unreachable("Failed to match indirect branch! (fragment 3)");
+      return false;
 
     // Match an ADR to load base address to be used when addressing JT targets
     SmallVector<MCInst *, 4> &UsesAdd = UDChain[DefAdd];
@@ -920,18 +924,15 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
       return false;
     }
     MCInst *DefBaseAddr = UsesAdd[1];
-    assert(DefBaseAddr->getOpcode() == AArch64::ADR &&
-           "Failed to match indirect branch pattern! (fragment 3)");
+    if (DefBaseAddr->getOpcode() != AArch64::ADR)
+      return false;
 
     PCRelBase = DefBaseAddr;
     // Match LOAD to load the jump table (relative) target
     const MCInst *DefLoad = UsesAdd[2];
-    assert(mayLoad(*DefLoad) &&
-           "Failed to match indirect branch load pattern! (1)");
-    assert((ScaleValue != 1LL || isLDRB(*DefLoad)) &&
-           "Failed to match indirect branch load pattern! (2)");
-    assert((ScaleValue != 2LL || isLDRH(*DefLoad)) &&
-           "Failed to match indirect branch load pattern! (3)");
+    if (!mayLoad(*DefLoad) || (ScaleValue == 1LL && !isLDRB(*DefLoad)) ||
+        (ScaleValue == 2LL && !isLDRH(*DefLoad)))
+      return false;
 
     // Match ADD that calculates the JumpTable Base Address (not the offset)
     SmallVector<MCInst *, 4> &UsesLoad = UDChain[DefLoad];
@@ -941,7 +942,6 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
         isRegToRegMove(*DefJTBaseAdd, From, To)) {
       // Sometimes base address may have been defined in another basic block
       // (hoisted). Return with no jump table info.
-      JumpTable = nullptr;
       return true;
     }
 
@@ -953,24 +953,27 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
       //  adr     x12, 0x247b30 <__gettextparse+0x5b0>
       //  add     x13, x12, w13, sxth #2
       //  br      x13
-      errs() << "BOLT-WARNING: Failed to match indirect branch: "
-                "nop/adr instead of adrp/add \n";
+      dbgs() << "BOLT-DEBUG: failed to match indirect branch: "
+                "nop/adr instead of adrp/add\n";
       return false;
     }
 
-    assert(DefJTBaseAdd->getOpcode() == AArch64::ADDXri &&
-           "Failed to match jump table base address pattern! (1)");
+    if (DefJTBaseAdd->getOpcode() != AArch64::ADDXri) {
+      dbgs() << "BOLT-DEBUG: "
+                "failed to match jump table base address pattern! (1)\n";
+      return false;
+    }
 
     if (DefJTBaseAdd->getOperand(2).isImm())
       Offset = DefJTBaseAdd->getOperand(2).getImm();
     SmallVector<MCInst *, 4> &UsesJTBaseAdd = UDChain[DefJTBaseAdd];
     const MCInst *DefJTBasePage = UsesJTBaseAdd[1];
     if (DefJTBasePage == nullptr || isLoadFromStack(*DefJTBasePage)) {
-      JumpTable = nullptr;
       return true;
     }
-    assert(DefJTBasePage->getOpcode() == AArch64::ADRP &&
-           "Failed to match jump table base page pattern! (2)");
+    if (DefJTBasePage->getOpcode() != AArch64::ADRP)
+      return false;
+
     if (DefJTBasePage->getOperand(1).isExpr())
       JumpTable = DefJTBasePage->getOperand(1).getExpr();
     return true;
diff --git a/bolt/test/AArch64/jump-table-heuristic-fail.s b/bolt/test/AArch64/jump-table-heuristic-fail.s
new file mode 100644
index 00000000000000..724171ac399251
--- /dev/null
+++ b/bolt/test/AArch64/jump-table-heuristic-fail.s
@@ -0,0 +1,29 @@
+## Verify that BOLT does not crash while encountering instruction sequence that
+## does not perfectly match jump table pattern.
+
+# REQUIRES: system-linux
+
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
+# RUN: %clang %cflags --target=aarch64-unknown-linux %t.o -o %t.exe -Wl,-q
+# RUN: llvm-bolt %t.exe -o %t.bolt --print-cfg 2>&1 | FileCheck %s
+
+  .section .text
+  .align 4
+  .globl _start
+  .type  _start, %function
+_start:
+  sub     w0, w0, #0x4a
+## The address loaded into x22 is undefined. However, the instructions that
+## follow ldr, use the x22 address as a regular jump table.
+  ldr     x22, [x29, #0x98]
+  ldrb    w0, [x22, w0, uxtw]
+  adr     x1, #12
+  add     x0, x1, w0, sxtb #2
+  br      x0
+# CHECK: br x0 # UNKNOWN
+.L0:
+  ret
+.size _start, .-_start
+
+## Force relocation mode.
+  .reloc 0, R_AARCH64_NONE
diff --git a/bolt/test/AArch64/test-indirect-branch.s b/bolt/test/AArch64/test-indirect-branch.s
index 168e50c8f47f52..1e16e76b11530f 100644
--- a/bolt/test/AArch64/test-indirect-branch.s
+++ b/bolt/test/AArch64/test-indirect-branch.s
@@ -3,10 +3,11 @@
 
 // clang-format off
 
-// REQUIRES: system-linux
+// REQUIRES: system-linux, asserts
+
 // RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
 // RUN: %clang %cflags --target=aarch64-unknown-linux %t.o -o %t.exe -Wl,-q
-// RUN: llvm-bolt %t.exe -o %t.bolt --print-cfg --strict\
+// RUN: llvm-bolt %t.exe -o %t.bolt --print-cfg --strict --debug-only=mcplus \
 // RUN:  -v=1 2>&1 | FileCheck %s
 
 // Pattern 1: there is no shift amount after the 'add' instruction.
@@ -39,7 +40,7 @@ _start:
 // svc #0
 
 // Pattern 1
-// CHECK: BOLT-WARNING: Failed to match indirect branch: ShiftVAL != 2
+// CHECK: BOLT-DEBUG: failed to match indirect branch: ShiftVAL != 2
   .globl test1
   .type  test1, %function
 test1:
@@ -57,7 +58,7 @@ test1_2:
    ret
 
 // Pattern 2
-// CHECK: BOLT-WARNING: Failed to match indirect branch: nop/adr instead of adrp/add
+// CHECK: BOLT-DEBUG: failed to match indirect branch: nop/adr instead of adrp/add
   .globl test2
   .type  test2, %function
 test2:

>From e3d985e32cae88c80ab373b7a99f339ff78cadcc Mon Sep 17 00:00:00 2001
From: Maksim Panchenko <maks at fb.com>
Date: Fri, 24 Jan 2025 16:38:36 -0800
Subject: [PATCH 2/2] fixup! [BOLT][AArch64] Remove assertions from jump table
 heuristic

---
 bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index c1055311141322..4b21ff719b3abe 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -901,8 +901,8 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
       //   ldr     w7, [x6]
       //   add     x6, x6, w7, sxtw => no shift amount
       //   br      x6
-      dbgs() << "BOLT-DEBUG: "
-                "failed to match indirect branch: ShiftVAL != 2\n";
+      LLVM_DEBUG(dbgs() << "BOLT-DEBUG: "
+                           "failed to match indirect branch: ShiftVAL != 2\n");
       return false;
     }
 
@@ -953,14 +953,14 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
       //  adr     x12, 0x247b30 <__gettextparse+0x5b0>
       //  add     x13, x12, w13, sxth #2
       //  br      x13
-      dbgs() << "BOLT-DEBUG: failed to match indirect branch: "
-                "nop/adr instead of adrp/add\n";
+      LLVM_DEBUG(dbgs() << "BOLT-DEBUG: failed to match indirect branch: "
+                           "nop/adr instead of adrp/add\n");
       return false;
     }
 
     if (DefJTBaseAdd->getOpcode() != AArch64::ADDXri) {
-      dbgs() << "BOLT-DEBUG: "
-                "failed to match jump table base address pattern! (1)\n";
+      LLVM_DEBUG(dbgs() << "BOLT-DEBUG: failed to match jump table base "
+                           "address pattern! (1)\n");
       return false;
     }
 



More information about the llvm-commits mailing list