[llvm] [BOLT][AArch64] Validate code padding (PR #164037)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 17 17:30:13 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-bolt
Author: YongKang Zhu (yozhu)
<details>
<summary>Changes</summary>
Check whether AArch64 function code padding is valid, and add
an option to treat invalid code padding as error.
---
Full diff: https://github.com/llvm/llvm-project/pull/164037.diff
7 Files Affected:
- (modified) bolt/include/bolt/Core/BinaryFunction.h (+11-5)
- (modified) bolt/lib/Core/BinaryContext.cpp (+79-17)
- (modified) bolt/lib/Core/BinaryFunction.cpp (+3-4)
- (modified) bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp (+5-1)
- (modified) bolt/test/AArch64/Inputs/plt-gnu-ld.yaml (+7-1)
- (added) bolt/test/AArch64/invalid-code-padding.s (+34)
- (modified) bolt/test/AArch64/plt-got.test (+3-2)
``````````diff
diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index 7b10b2d28d7e3..118d5792ceed3 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -2142,8 +2142,9 @@ class BinaryFunction {
}
/// Detects whether \p Address is inside a data region in this function
- /// (constant islands).
- bool isInConstantIsland(uint64_t Address) const {
+ /// (constant islands), and optionally return the island size starting
+ /// from the given \p Address.
+ bool isInConstantIsland(uint64_t Address, uint64_t *Size = nullptr) const {
if (!Islands)
return false;
@@ -2161,10 +2162,15 @@ class BinaryFunction {
DataIter = std::prev(DataIter);
auto CodeIter = Islands->CodeOffsets.upper_bound(Offset);
- if (CodeIter == Islands->CodeOffsets.begin())
+ if (CodeIter == Islands->CodeOffsets.begin() ||
+ *std::prev(CodeIter) <= *DataIter) {
+ if (Size)
+ *Size = (CodeIter == Islands->CodeOffsets.end() ? getMaxSize()
+ : *CodeIter) -
+ Offset;
return true;
-
- return *std::prev(CodeIter) <= *DataIter;
+ }
+ return false;
}
uint16_t getConstantIslandAlignment() const;
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index 46b3372642330..9ea534b72a56f 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -77,6 +77,11 @@ cl::opt<std::string> CompDirOverride(
"location, which is used with DW_AT_dwo_name to construct a path "
"to *.dwo files."),
cl::Hidden, cl::init(""), cl::cat(BoltCategory));
+
+static cl::opt<bool>
+ FailOnInvalidPadding("fail-on-invalid-padding", cl::Hidden, cl::init(false),
+ cl::desc("treat invalid code padding as error"),
+ cl::ZeroOrMore, cl::cat(BoltCategory));
} // namespace opts
namespace llvm {
@@ -942,8 +947,7 @@ std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
}
bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
- // FIXME: aarch64 support is missing.
- if (!isX86())
+ if (!isX86() && !isAArch64())
return true;
if (BF.getSize() == BF.getMaxSize())
@@ -973,14 +977,26 @@ bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
return Offset - StartOffset;
};
- // Skip a sequence of zero bytes.
+ // Skip a sequence of zero bytes. For AArch64 we only skip 4 bytes of zeros
+ // in case the following zeros belong to constant island or veneer.
auto skipZeros = [&]() {
const uint64_t StartOffset = Offset;
- for (; Offset < BF.getMaxSize(); ++Offset)
- if ((*FunctionData)[Offset] != 0)
+ uint64_t CurrentOffset = Offset;
+ for (; CurrentOffset < BF.getMaxSize() &&
+ (!isAArch64() || CurrentOffset < StartOffset + 4);
+ ++CurrentOffset)
+ if ((*FunctionData)[CurrentOffset] != 0)
break;
- return Offset - StartOffset;
+ uint64_t NumZeros = CurrentOffset - StartOffset;
+ if (isAArch64())
+ NumZeros &= ~((uint64_t)0x3);
+
+ if (NumZeros == 0)
+ return false;
+ Offset += NumZeros;
+ InstrAddress += NumZeros;
+ return true;
};
// Accept the whole padding area filled with breakpoints.
@@ -993,6 +1009,8 @@ bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
// Some functions have a jump to the next function or to the padding area
// inserted after the body.
auto isSkipJump = [&](const MCInst &Instr) {
+ if (!isX86())
+ return false;
uint64_t TargetAddress = 0;
if (MIB->isUnconditionalBranch(Instr) &&
MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) {
@@ -1004,34 +1022,73 @@ bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
return false;
};
+ // For veneers that are not already covered by binary functions, only those
+ // that handleAArch64Veneer() can recognize are checked here.
+ auto skipAArch64Veneer = [&]() {
+ if (!isAArch64() || Offset >= BF.getMaxSize())
+ return false;
+ BinaryFunction *BFVeneer = getBinaryFunctionContainingAddress(InstrAddress);
+ if (BFVeneer) {
+ // A binary function may have been created to point to this veneer.
+ Offset += BFVeneer->getSize();
+ assert(Offset <= BF.getMaxSize() &&
+ "AArch64 veneeer goes past the max size of function");
+ InstrAddress += BFVeneer->getSize();
+ return true;
+ }
+ const uint64_t AArch64VeneerSize = 12;
+ if (Offset + AArch64VeneerSize <= BF.getMaxSize() &&
+ handleAArch64Veneer(InstrAddress, /*MatchOnly*/ true)) {
+ Offset += AArch64VeneerSize;
+ InstrAddress += AArch64VeneerSize;
+ this->errs() << "BOLT-WARNING: found unmarked AArch64 veneer at 0x"
+ << Twine::utohexstr(BF.getAddress() + Offset) << '\n';
+ return true;
+ }
+ return false;
+ };
+
+ auto skipAArch64ConstantIsland = [&]() {
+ if (!isAArch64() || Offset >= BF.getMaxSize())
+ return false;
+ uint64_t Size;
+ if (BF.isInConstantIsland(InstrAddress, &Size)) {
+ Offset += Size;
+ InstrAddress += Size;
+ return true;
+ }
+ return false;
+ };
+
// Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
- while (skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
+ // For AArch64 also check veneers and skip constant islands.
+ while (skipAArch64Veneer() || skipAArch64ConstantIsland() ||
+ skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
skipZeros())
;
if (Offset == BF.getMaxSize())
return true;
- if (opts::Verbosity >= 1) {
- this->errs() << "BOLT-WARNING: bad padding at address 0x"
- << Twine::utohexstr(BF.getAddress() + BF.getSize())
- << " starting at offset " << (Offset - BF.getSize())
- << " in function " << BF << '\n'
- << FunctionData->slice(BF.getSize(),
- BF.getMaxSize() - BF.getSize())
- << '\n';
- }
-
+ this->errs() << "BOLT-WARNING: bad padding at address 0x"
+ << Twine::utohexstr(BF.getAddress() + BF.getSize())
+ << " starting at offset " << (Offset - BF.getSize())
+ << " in function " << BF << '\n'
+ << FunctionData->slice(BF.getSize(),
+ BF.getMaxSize() - BF.getSize())
+ << '\n';
return false;
}
void BinaryContext::adjustCodePadding() {
+ uint64_t NumInvalid = 0;
for (auto &BFI : BinaryFunctions) {
BinaryFunction &BF = BFI.second;
if (!shouldEmit(BF))
continue;
if (!hasValidCodePadding(BF)) {
+ NumInvalid++;
if (HasRelocations) {
this->errs() << "BOLT-WARNING: function " << BF
<< " has invalid padding. Ignoring the function\n";
@@ -1041,6 +1098,11 @@ void BinaryContext::adjustCodePadding() {
}
}
}
+ if (NumInvalid && opts::FailOnInvalidPadding) {
+ this->errs() << "BOLT-ERROR: found " << NumInvalid
+ << " instance(s) of invalid code padding\n";
+ exit(1);
+ }
}
MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 4dfd4ba6f611b..776f4aed551bf 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1427,10 +1427,9 @@ Error BinaryFunction::disassemble() {
!(BC.isAArch64() &&
BC.handleAArch64Veneer(TargetAddress, /*MatchOnly*/ true))) {
// Result of __builtin_unreachable().
- LLVM_DEBUG(dbgs() << "BOLT-DEBUG: jump past end detected at 0x"
- << Twine::utohexstr(AbsoluteInstrAddr)
- << " in function " << *this
- << " : replacing with nop.\n");
+ errs() << "BOLT-WARNING: jump past end detected at 0x"
+ << Twine::utohexstr(AbsoluteInstrAddr) << " in function "
+ << *this << " : replacing with nop.\n";
BC.MIB->createNoop(Instruction);
if (IsCondBranch) {
// Register branch offset for profile validation.
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 6954cb295e86a..7769162d67eaf 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -157,6 +157,10 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
MCPhysReg getStackPointer() const override { return AArch64::SP; }
MCPhysReg getFramePointer() const override { return AArch64::FP; }
+ bool isBreakpoint(const MCInst &Inst) const override {
+ return Inst.getOpcode() == AArch64::BRK;
+ }
+
bool isPush(const MCInst &Inst) const override {
return isStoreToStack(Inst);
};
@@ -2153,7 +2157,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
--I;
Address -= 4;
- if (I->getOpcode() != AArch64::ADRP ||
+ if (I != Begin || I->getOpcode() != AArch64::ADRP ||
MCPlus::getNumPrimeOperands(*I) < 2 || !I->getOperand(0).isReg() ||
!I->getOperand(1).isImm() || I->getOperand(0).getReg() != AArch64::X16)
return 0;
diff --git a/bolt/test/AArch64/Inputs/plt-gnu-ld.yaml b/bolt/test/AArch64/Inputs/plt-gnu-ld.yaml
index 1e3353acb633a..f6dc60d062ced 100644
--- a/bolt/test/AArch64/Inputs/plt-gnu-ld.yaml
+++ b/bolt/test/AArch64/Inputs/plt-gnu-ld.yaml
@@ -93,7 +93,7 @@ Sections:
Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
Address: 0x400510
AddressAlign: 0x8
- Content: 1D0080D21E0080D2E50300AAE10340F9E2230091E603009100000090002015910300009063201B910400009084201D91E0FFFF97EBFFFF972F0000148000009000F047F9400000B4E2FFFF17C0035FD6800000B000000191810000B0210001913F0000EBC00000540100009021B443F9610000B4F00301AA00021FD6C0035FD6800000B000000191810000B021000191210000CB22FC7FD3410C818BFF0781EB21FC4193C00000540200009042B843F9620000B4F00302AA00021FD6C0035FD6FD7BBEA9FD030091F30B00F9930000B06002413980000035DEFFFF972000805260020139F30B40F9FD7BC2A8C0035FD6E4FFFF17FF8300D1FD7B01A9FD430091BFC31FB8E1230091E8DD9752A8D5BB72E80300B9E80B00B9E0130091FF0700B9880000B00900009029C11291092500F9082540F9820080D200013FD6E90340B9E80740B90801096BA00000540100001428008052A8C31FB814000014880000B00900009029411391092900F9082940F9E0230091E1031F2A820080D200013FD6E80B40B9A80000340100001428008052A8C31FB8050000140000009000E01D9194FFFF9701000014A0C35FB8FD7B41A9FF830091C0035FD6FD7BBCA9FD030091F35301A99400009094023891F55B02A995000090B5E23791940215CBF603002AF76303A9F70301AAF80302AA5DFFFF97FF0F94EB6001005494FE4393130080D2A37A73F8E20318AA73060091E10317AAE003162A60003FD69F0213EB21FFFF54F35341A9F55B42A9F76343A9FD7BC4A8C0035FD61F2003D5C0035FD6
+ Content: 1D0080D21E0080D2E50300AAE10340F9E2230091E603009100000090002015910300009063201B910400009084201D91E0FFFF97EBFFFF972F0000148000009000F047F9400000B4E2FFFF17C0035FD6800000B000000191810000B0210001913F0000EBC00000540100009021B443F9610000B4F00301AA00021FD6C0035FD6800000B000000191810000B021000191210000CB22FC7FD3410C818BFF0781EB21FC4193C00000540200009042B843F9620000B4F00302AA00021FD6C0035FD6FD7BBEA9FD030091F30B00F9930000B06002413980000035DEFFFF972000805260020139F30B40F9FD7BC2A8C0035FD6E4FFFF17FF8300D1FD7B01A9FD430091BFC31FB8E1230091E8DD9752A8D5BB72E80300B9E80B00B9E0130091FF0700B9880000B00900009029C11291092500F9082540F9820080D200013FD6E90340B9E80740B90801096BA00000540100001428008052A8C31FB814000014880000B00900009029411391092900F9082940F9E0230091E1031F2A820080D200013FD6E80B40B9A80000340100001428008052A8C31FB8050000140000009000E01D9194FFFF9701000014A0C35FB8FD7B41A9FF830091C0035FD6FD7BBCA9FD030091F35301A99400009094023891F55B02A995000090B5E23791940215CBF603002AF76303A9F70301AAF80302AA60003FD6FF0F94EB6001005494FE4393130080D2A37A73F8E20318AA73060091E10317AAE003162A60003FD69F0213EB21FFFF54F35341A9F55B42A9F76343A9FD7BC4A8C0035FD61F2003D5C0035FD6
- Name: .rodata
Type: SHT_PROGBITS
Flags: [ SHF_ALLOC ]
@@ -435,6 +435,12 @@ Symbols:
Binding: STB_GLOBAL
Value: 0x400604
Size: 0xC4
+ - Name: post_main
+ Type: STT_FUNC
+ Section: .text
+ Binding: STB_GLOBAL
+ Value: 0x4006C8
+ Size: 0x7C
- Name: 'printf@@GLIBC_2.17'
Type: STT_FUNC
Binding: STB_GLOBAL
diff --git a/bolt/test/AArch64/invalid-code-padding.s b/bolt/test/AArch64/invalid-code-padding.s
new file mode 100644
index 0000000000000..4706e600621ab
--- /dev/null
+++ b/bolt/test/AArch64/invalid-code-padding.s
@@ -0,0 +1,34 @@
+# RUN: %clang %cflags %s -o %t.so -Wl,-q
+# RUN: not llvm-bolt %t.so -o %t.bolt --fail-on-invalid-padding \
+# RUN: 2>&1 | FileCheck %s
+# CHECK: BOLT-ERROR: found 1 instance(s) of invalid code padding
+
+ .text
+ .align 2
+ .global foo
+ .type foo, %function
+foo:
+ cmp x0, x1
+ b.eq .Ltmp1
+ adrp x1, jmptbl
+ add x1, x1, :lo12:jmptbl
+ ldrsw x2, [x1, x2, lsl #2]
+ br x2
+ b .Ltmp1
+.Ltmp2:
+ add x0, x0, x1
+ ret
+ .size foo, .-foo
+
+.Ltmp1:
+ add x0, x0, x1
+ b .Ltmp2
+
+ # Dummy relocation to force relocation mode
+ .reloc 0, R_AARCH64_NONE
+
+ .section .rodata, "a"
+ .align 2
+ .global jmptbl
+jmptbl:
+ .word .text+0x28 - .
diff --git a/bolt/test/AArch64/plt-got.test b/bolt/test/AArch64/plt-got.test
index be1c095784b70..18b1ea4bf908a 100644
--- a/bolt/test/AArch64/plt-got.test
+++ b/bolt/test/AArch64/plt-got.test
@@ -1,7 +1,8 @@
// This test checks .plt.got handling by BOLT
RUN: yaml2obj %p/Inputs/plt-got.yaml &> %t.exe
-RUN: llvm-bolt %t.exe -o %t.bolt --print-disasm --print-only=_start/1 | \
-RUN: FileCheck %s
+RUN: llvm-bolt %t.exe -o %t.bolt --print-disasm --print-only=_start/1 \
+RUN: 2>&1 | FileCheck %s
CHECK: bl abort at PLT
+CHECK: BOLT-WARNING: found unmarked AArch64 veneer
``````````
</details>
https://github.com/llvm/llvm-project/pull/164037
More information about the llvm-commits
mailing list