[llvm] [BOLT][AArch64] Validate code padding (PR #164037)

via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 17 17:30:13 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-bolt

Author: YongKang Zhu (yozhu)

<details>
<summary>Changes</summary>

Check whether AArch64 function code padding is valid, and add
an option to treat invalid code padding as error.

---
Full diff: https://github.com/llvm/llvm-project/pull/164037.diff


7 Files Affected:

- (modified) bolt/include/bolt/Core/BinaryFunction.h (+11-5) 
- (modified) bolt/lib/Core/BinaryContext.cpp (+79-17) 
- (modified) bolt/lib/Core/BinaryFunction.cpp (+3-4) 
- (modified) bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp (+5-1) 
- (modified) bolt/test/AArch64/Inputs/plt-gnu-ld.yaml (+7-1) 
- (added) bolt/test/AArch64/invalid-code-padding.s (+34) 
- (modified) bolt/test/AArch64/plt-got.test (+3-2) 


``````````diff
diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index 7b10b2d28d7e3..118d5792ceed3 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -2142,8 +2142,9 @@ class BinaryFunction {
   }
 
   /// Detects whether \p Address is inside a data region in this function
-  /// (constant islands).
-  bool isInConstantIsland(uint64_t Address) const {
+  /// (constant islands), and optionally return the island size starting
+  /// from the given \p Address.
+  bool isInConstantIsland(uint64_t Address, uint64_t *Size = nullptr) const {
     if (!Islands)
       return false;
 
@@ -2161,10 +2162,15 @@ class BinaryFunction {
     DataIter = std::prev(DataIter);
 
     auto CodeIter = Islands->CodeOffsets.upper_bound(Offset);
-    if (CodeIter == Islands->CodeOffsets.begin())
+    if (CodeIter == Islands->CodeOffsets.begin() ||
+        *std::prev(CodeIter) <= *DataIter) {
+      if (Size)
+        *Size = (CodeIter == Islands->CodeOffsets.end() ? getMaxSize()
+                                                        : *CodeIter) -
+                Offset;
       return true;
-
-    return *std::prev(CodeIter) <= *DataIter;
+    }
+    return false;
   }
 
   uint16_t getConstantIslandAlignment() const;
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index 46b3372642330..9ea534b72a56f 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -77,6 +77,11 @@ cl::opt<std::string> CompDirOverride(
              "location, which is used with DW_AT_dwo_name to construct a path "
              "to *.dwo files."),
     cl::Hidden, cl::init(""), cl::cat(BoltCategory));
+
+static cl::opt<bool>
+    FailOnInvalidPadding("fail-on-invalid-padding", cl::Hidden, cl::init(false),
+                         cl::desc("treat invalid code padding as error"),
+                         cl::ZeroOrMore, cl::cat(BoltCategory));
 } // namespace opts
 
 namespace llvm {
@@ -942,8 +947,7 @@ std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
 }
 
 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
-  // FIXME: aarch64 support is missing.
-  if (!isX86())
+  if (!isX86() && !isAArch64())
     return true;
 
   if (BF.getSize() == BF.getMaxSize())
@@ -973,14 +977,26 @@ bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
     return Offset - StartOffset;
   };
 
-  // Skip a sequence of zero bytes.
+  // Skip a sequence of zero bytes. For AArch64 we only skip 4 bytes of zeros
+  // in case the following zeros belong to constant island or veneer.
   auto skipZeros = [&]() {
     const uint64_t StartOffset = Offset;
-    for (; Offset < BF.getMaxSize(); ++Offset)
-      if ((*FunctionData)[Offset] != 0)
+    uint64_t CurrentOffset = Offset;
+    for (; CurrentOffset < BF.getMaxSize() &&
+           (!isAArch64() || CurrentOffset < StartOffset + 4);
+         ++CurrentOffset)
+      if ((*FunctionData)[CurrentOffset] != 0)
         break;
 
-    return Offset - StartOffset;
+    uint64_t NumZeros = CurrentOffset - StartOffset;
+    if (isAArch64())
+      NumZeros &= ~((uint64_t)0x3);
+
+    if (NumZeros == 0)
+      return false;
+    Offset += NumZeros;
+    InstrAddress += NumZeros;
+    return true;
   };
 
   // Accept the whole padding area filled with breakpoints.
@@ -993,6 +1009,8 @@ bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
   // Some functions have a jump to the next function or to the padding area
   // inserted after the body.
   auto isSkipJump = [&](const MCInst &Instr) {
+    if (!isX86())
+      return false;
     uint64_t TargetAddress = 0;
     if (MIB->isUnconditionalBranch(Instr) &&
         MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) {
@@ -1004,34 +1022,73 @@ bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
     return false;
   };
 
+  // For veneers that are not already covered by binary functions, only those
+  // that handleAArch64Veneer() can recognize are checked here.
+  auto skipAArch64Veneer = [&]() {
+    if (!isAArch64() || Offset >= BF.getMaxSize())
+      return false;
+    BinaryFunction *BFVeneer = getBinaryFunctionContainingAddress(InstrAddress);
+    if (BFVeneer) {
+      // A binary function may have been created to point to this veneer.
+      Offset += BFVeneer->getSize();
+      assert(Offset <= BF.getMaxSize() &&
+             "AArch64 veneeer goes past the max size of function");
+      InstrAddress += BFVeneer->getSize();
+      return true;
+    }
+    const uint64_t AArch64VeneerSize = 12;
+    if (Offset + AArch64VeneerSize <= BF.getMaxSize() &&
+        handleAArch64Veneer(InstrAddress, /*MatchOnly*/ true)) {
+      Offset += AArch64VeneerSize;
+      InstrAddress += AArch64VeneerSize;
+      this->errs() << "BOLT-WARNING: found unmarked AArch64 veneer at 0x"
+                   << Twine::utohexstr(BF.getAddress() + Offset) << '\n';
+      return true;
+    }
+    return false;
+  };
+
+  auto skipAArch64ConstantIsland = [&]() {
+    if (!isAArch64() || Offset >= BF.getMaxSize())
+      return false;
+    uint64_t Size;
+    if (BF.isInConstantIsland(InstrAddress, &Size)) {
+      Offset += Size;
+      InstrAddress += Size;
+      return true;
+    }
+    return false;
+  };
+
   // Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
-  while (skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
+  // For AArch64 also check veneers and skip constant islands.
+  while (skipAArch64Veneer() || skipAArch64ConstantIsland() ||
+         skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
          skipZeros())
     ;
 
   if (Offset == BF.getMaxSize())
     return true;
 
-  if (opts::Verbosity >= 1) {
-    this->errs() << "BOLT-WARNING: bad padding at address 0x"
-                 << Twine::utohexstr(BF.getAddress() + BF.getSize())
-                 << " starting at offset " << (Offset - BF.getSize())
-                 << " in function " << BF << '\n'
-                 << FunctionData->slice(BF.getSize(),
-                                        BF.getMaxSize() - BF.getSize())
-                 << '\n';
-  }
-
+  this->errs() << "BOLT-WARNING: bad padding at address 0x"
+               << Twine::utohexstr(BF.getAddress() + BF.getSize())
+               << " starting at offset " << (Offset - BF.getSize())
+               << " in function " << BF << '\n'
+               << FunctionData->slice(BF.getSize(),
+                                      BF.getMaxSize() - BF.getSize())
+               << '\n';
   return false;
 }
 
 void BinaryContext::adjustCodePadding() {
+  uint64_t NumInvalid = 0;
   for (auto &BFI : BinaryFunctions) {
     BinaryFunction &BF = BFI.second;
     if (!shouldEmit(BF))
       continue;
 
     if (!hasValidCodePadding(BF)) {
+      NumInvalid++;
       if (HasRelocations) {
         this->errs() << "BOLT-WARNING: function " << BF
                      << " has invalid padding. Ignoring the function\n";
@@ -1041,6 +1098,11 @@ void BinaryContext::adjustCodePadding() {
       }
     }
   }
+  if (NumInvalid && opts::FailOnInvalidPadding) {
+    this->errs() << "BOLT-ERROR: found " << NumInvalid
+                 << " instance(s) of invalid code padding\n";
+    exit(1);
+  }
 }
 
 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 4dfd4ba6f611b..776f4aed551bf 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1427,10 +1427,9 @@ Error BinaryFunction::disassemble() {
                 !(BC.isAArch64() &&
                   BC.handleAArch64Veneer(TargetAddress, /*MatchOnly*/ true))) {
               // Result of __builtin_unreachable().
-              LLVM_DEBUG(dbgs() << "BOLT-DEBUG: jump past end detected at 0x"
-                                << Twine::utohexstr(AbsoluteInstrAddr)
-                                << " in function " << *this
-                                << " : replacing with nop.\n");
+              errs() << "BOLT-WARNING: jump past end detected at 0x"
+                     << Twine::utohexstr(AbsoluteInstrAddr) << " in function "
+                     << *this << " : replacing with nop.\n";
               BC.MIB->createNoop(Instruction);
               if (IsCondBranch) {
                 // Register branch offset for profile validation.
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 6954cb295e86a..7769162d67eaf 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -157,6 +157,10 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
   MCPhysReg getStackPointer() const override { return AArch64::SP; }
   MCPhysReg getFramePointer() const override { return AArch64::FP; }
 
+  bool isBreakpoint(const MCInst &Inst) const override {
+    return Inst.getOpcode() == AArch64::BRK;
+  }
+
   bool isPush(const MCInst &Inst) const override {
     return isStoreToStack(Inst);
   };
@@ -2153,7 +2157,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
 
     --I;
     Address -= 4;
-    if (I->getOpcode() != AArch64::ADRP ||
+    if (I != Begin || I->getOpcode() != AArch64::ADRP ||
         MCPlus::getNumPrimeOperands(*I) < 2 || !I->getOperand(0).isReg() ||
         !I->getOperand(1).isImm() || I->getOperand(0).getReg() != AArch64::X16)
       return 0;
diff --git a/bolt/test/AArch64/Inputs/plt-gnu-ld.yaml b/bolt/test/AArch64/Inputs/plt-gnu-ld.yaml
index 1e3353acb633a..f6dc60d062ced 100644
--- a/bolt/test/AArch64/Inputs/plt-gnu-ld.yaml
+++ b/bolt/test/AArch64/Inputs/plt-gnu-ld.yaml
@@ -93,7 +93,7 @@ Sections:
     Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
     Address:         0x400510
     AddressAlign:    0x8
-    Content:         1D0080D21E0080D2E50300AAE10340F9E2230091E603009100000090002015910300009063201B910400009084201D91E0FFFF97EBFFFF972F0000148000009000F047F9400000B4E2FFFF17C0035FD6800000B000000191810000B0210001913F0000EBC00000540100009021B443F9610000B4F00301AA00021FD6C0035FD6800000B000000191810000B021000191210000CB22FC7FD3410C818BFF0781EB21FC4193C00000540200009042B843F9620000B4F00302AA00021FD6C0035FD6FD7BBEA9FD030091F30B00F9930000B06002413980000035DEFFFF972000805260020139F30B40F9FD7BC2A8C0035FD6E4FFFF17FF8300D1FD7B01A9FD430091BFC31FB8E1230091E8DD9752A8D5BB72E80300B9E80B00B9E0130091FF0700B9880000B00900009029C11291092500F9082540F9820080D200013FD6E90340B9E80740B90801096BA00000540100001428008052A8C31FB814000014880000B00900009029411391092900F9082940F9E0230091E1031F2A820080D200013FD6E80B40B9A80000340100001428008052A8C31FB8050000140000009000E01D9194FFFF9701000014A0C35FB8FD7B41A9FF830091C0035FD6FD7BBCA9FD030091F35301A99400009094023891F55B02A995000090B5E23791940215CBF603002AF76303A9F70301AAF80302AA5DFFFF97FF0F94EB6001005494FE4393130080D2A37A73F8E20318AA73060091E10317AAE003162A60003FD69F0213EB21FFFF54F35341A9F55B42A9F76343A9FD7BC4A8C0035FD61F2003D5C0035FD6
+    Content:         1D0080D21E0080D2E50300AAE10340F9E2230091E603009100000090002015910300009063201B910400009084201D91E0FFFF97EBFFFF972F0000148000009000F047F9400000B4E2FFFF17C0035FD6800000B000000191810000B0210001913F0000EBC00000540100009021B443F9610000B4F00301AA00021FD6C0035FD6800000B000000191810000B021000191210000CB22FC7FD3410C818BFF0781EB21FC4193C00000540200009042B843F9620000B4F00302AA00021FD6C0035FD6FD7BBEA9FD030091F30B00F9930000B06002413980000035DEFFFF972000805260020139F30B40F9FD7BC2A8C0035FD6E4FFFF17FF8300D1FD7B01A9FD430091BFC31FB8E1230091E8DD9752A8D5BB72E80300B9E80B00B9E0130091FF0700B9880000B00900009029C11291092500F9082540F9820080D200013FD6E90340B9E80740B90801096BA00000540100001428008052A8C31FB814000014880000B00900009029411391092900F9082940F9E0230091E1031F2A820080D200013FD6E80B40B9A80000340100001428008052A8C31FB8050000140000009000E01D9194FFFF9701000014A0C35FB8FD7B41A9FF830091C0035FD6FD7BBCA9FD030091F35301A99400009094023891F55B02A995000090B5E23791940215CBF603002AF76303A9F70301AAF80302AA60003FD6FF0F94EB6001005494FE4393130080D2A37A73F8E20318AA73060091E10317AAE003162A60003FD69F0213EB21FFFF54F35341A9F55B42A9F76343A9FD7BC4A8C0035FD61F2003D5C0035FD6
   - Name:            .rodata
     Type:            SHT_PROGBITS
     Flags:           [ SHF_ALLOC ]
@@ -435,6 +435,12 @@ Symbols:
     Binding:         STB_GLOBAL
     Value:           0x400604
     Size:            0xC4
+  - Name:            post_main
+    Type:            STT_FUNC
+    Section:         .text
+    Binding:         STB_GLOBAL
+    Value:           0x4006C8
+    Size:            0x7C
   - Name:            'printf@@GLIBC_2.17'
     Type:            STT_FUNC
     Binding:         STB_GLOBAL
diff --git a/bolt/test/AArch64/invalid-code-padding.s b/bolt/test/AArch64/invalid-code-padding.s
new file mode 100644
index 0000000000000..4706e600621ab
--- /dev/null
+++ b/bolt/test/AArch64/invalid-code-padding.s
@@ -0,0 +1,34 @@
+# RUN: %clang %cflags %s -o %t.so -Wl,-q
+# RUN: not llvm-bolt %t.so -o %t.bolt --fail-on-invalid-padding \
+# RUN:   2>&1 | FileCheck %s
+# CHECK: BOLT-ERROR: found 1 instance(s) of invalid code padding
+
+  .text
+  .align 2
+  .global foo
+  .type foo, %function
+foo:
+  cmp x0, x1
+  b.eq .Ltmp1
+  adrp x1, jmptbl
+  add x1, x1, :lo12:jmptbl
+  ldrsw x2, [x1, x2, lsl #2]
+  br x2
+  b .Ltmp1
+.Ltmp2:
+  add x0, x0, x1
+  ret
+  .size foo, .-foo
+
+.Ltmp1:
+  add x0, x0, x1
+  b .Ltmp2
+
+  # Dummy relocation to force relocation mode
+  .reloc 0, R_AARCH64_NONE
+
+  .section .rodata, "a"
+  .align 2
+  .global jmptbl
+jmptbl:
+  .word .text+0x28 - .
diff --git a/bolt/test/AArch64/plt-got.test b/bolt/test/AArch64/plt-got.test
index be1c095784b70..18b1ea4bf908a 100644
--- a/bolt/test/AArch64/plt-got.test
+++ b/bolt/test/AArch64/plt-got.test
@@ -1,7 +1,8 @@
 // This test checks .plt.got handling by BOLT
 
 RUN: yaml2obj %p/Inputs/plt-got.yaml &> %t.exe
-RUN: llvm-bolt %t.exe -o %t.bolt --print-disasm --print-only=_start/1 | \
-RUN:   FileCheck %s
+RUN: llvm-bolt %t.exe -o %t.bolt --print-disasm --print-only=_start/1 \
+RUN:   2>&1 | FileCheck %s
 
 CHECK: bl abort at PLT
+CHECK: BOLT-WARNING: found unmarked AArch64 veneer

``````````

</details>


https://github.com/llvm/llvm-project/pull/164037


More information about the llvm-commits mailing list