[llvm] [BOLT][AArch64] Add support for compact code model (PR #112110)
Paschalis Mpeis via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 23 02:07:18 PDT 2024
================
@@ -629,7 +637,271 @@ Error LongJmpPass::relax(BinaryFunction &Func, bool &Modified) {
return Error::success();
}
+void LongJmpPass::relaxLocalBranches(BinaryFunction &BF) {
+ BinaryContext &BC = BF.getBinaryContext();
+ auto &MIB = BC.MIB;
+
+ if (!BF.isSimple())
+ return;
+
+ // Quick path.
+ if (!BF.isSplit() && BF.estimateSize() < ShortestJumpSpan)
+ return;
+
+ auto isBranchOffsetInRange = [&](const MCInst &Inst, int64_t Offset) {
+ const unsigned Bits = MIB->getPCRelEncodingSize(Inst);
+ return isIntN(Bits, Offset);
+ };
+
+ auto isBlockInRange = [&](const MCInst &Inst, uint64_t InstAddress,
+ const BinaryBasicBlock &BB) {
+ const int64_t Offset = BB.getOutputStartAddress() - InstAddress;
+ return isBranchOffsetInRange(Inst, Offset);
+ };
+
+ // Keep track of *all* function trampolines that are going to be added to the
+ // function layout at the end of relaxation.
+ std::vector<std::pair<BinaryBasicBlock *, std::unique_ptr<BinaryBasicBlock>>>
+ FunctionTrampolines;
+
+ // Function fragments are relaxed independently.
+ for (FunctionFragment &FF : BF.getLayout().fragments()) {
+ // Fill out code size estimation for the fragment. Use output BB address
+ // ranges to store offsets from the start of the function.
+ uint64_t CodeSize = 0;
+ for (BinaryBasicBlock *BB : FF) {
+ BB->setOutputStartAddress(CodeSize);
+ CodeSize += BB->estimateSize();
+ BB->setOutputEndAddress(CodeSize);
+ }
+
+ // Dynamically-updated size of the fragment.
+ uint64_t FragmentSize = CodeSize;
+
+ // Size of the trampoline in bytes.
+ constexpr uint64_t TrampolineSize = 4;
+
+ // Trampolines created for the fragment. DestinationBB -> TrampolineBB.
+ // NB: here we store only the first trampoline created for DestinationBB.
+ DenseMap<const BinaryBasicBlock *, BinaryBasicBlock *> FragmentTrampolines;
+
+ // Create a trampoline code after \p BB or at the end of the fragment if BB
+ // is nullptr.
+ auto addTrampolineAfter = [&](BinaryBasicBlock *BB,
+ BinaryBasicBlock *TargetBB, uint64_t Count,
+ bool UpdateOffsets = true) {
+ std::unique_ptr<BinaryBasicBlock> TrampolineBB = BF.createBasicBlock();
+ MCInst Inst;
+ {
+ auto L = BC.scopeLock();
+ MIB->createUncondBranch(Inst, TargetBB->getLabel(), BC.Ctx.get());
+ }
+ TrampolineBB->addInstruction(Inst);
+ TrampolineBB->addSuccessor(TargetBB, Count);
+ TrampolineBB->setExecutionCount(Count);
+ const uint64_t TrampolineAddress =
+ BB ? BB->getOutputEndAddress() : FragmentSize;
+ TrampolineBB->setOutputStartAddress(TrampolineAddress);
+ TrampolineBB->setOutputEndAddress(TrampolineAddress + TrampolineSize);
+ TrampolineBB->setFragmentNum(FF.getFragmentNum());
+
+ if (UpdateOffsets) {
+ FragmentSize += TrampolineSize;
+ for (BinaryBasicBlock *IBB : FF) {
+ if (IBB->getOutputStartAddress() >= TrampolineAddress) {
+ IBB->setOutputStartAddress(IBB->getOutputStartAddress() +
+ TrampolineSize);
+ IBB->setOutputEndAddress(IBB->getOutputEndAddress() +
+ TrampolineSize);
+ }
+ }
+ for (auto &Pair : FunctionTrampolines) {
+ BinaryBasicBlock *IBB = Pair.second.get();
+ if (IBB->getFragmentNum() != TrampolineBB->getFragmentNum())
+ continue;
+ if (IBB == TrampolineBB.get())
----------------
paschalis-mpeis wrote:
Hey @maksfb, sorry for any confusion, I was referring just to the `addTrampolineAfter` lambda. Something like the below diff is what I had in mind.
If FunctionTrampolines insertion happens just before offsets are updated, then the start/end addresses of the added trampoline BB can be adjusted with a single loop? This assumes that only the added TrampolineBB of the original FF would need such an adjustment.
With some quick testing the diff seems to be OK. Tests passed (incl. `compact-code-model.s` cur/prev revisions). Also the more complex mongodb binary of [#99848](https://github.com/llvm/llvm-project/issues/99848) was OK.
This was just a suggestion/observation anyway; ignore if you think is not always correct or if the original code is more readable.
```diff
diff --git a/bolt/lib/Passes/LongJmp.cpp b/bolt/lib/Passes/LongJmp.cpp
index 279ff63faf11..cb1d556a06e5 100644
--- a/bolt/lib/Passes/LongJmp.cpp
+++ b/bolt/lib/Passes/LongJmp.cpp
@@ -702,21 +702,17 @@ void LongJmpPass::relaxLocalBranches(BinaryFunction &BF) {
TrampolineBB->setOutputEndAddress(TrampolineAddress + TrampolineSize);
TrampolineBB->setFragmentNum(FF.getFragmentNum());
+ if (!FragmentTrampolines.lookup(TargetBB))
+ FragmentTrampolines[TargetBB] = TrampolineBB.get();
+ FunctionTrampolines.emplace_back(BB ? BB : FF.back(),
+ std::move(TrampolineBB));
+
+ auto *TBB = FunctionTrampolines.back().second.get();
if (UpdateOffsets) {
FragmentSize += TrampolineSize;
- for (BinaryBasicBlock *IBB : FF) {
- if (IBB->getOutputStartAddress() >= TrampolineAddress) {
- IBB->setOutputStartAddress(IBB->getOutputStartAddress() +
- TrampolineSize);
- IBB->setOutputEndAddress(IBB->getOutputEndAddress() +
- TrampolineSize);
- }
- }
for (auto &Pair : FunctionTrampolines) {
BinaryBasicBlock *IBB = Pair.second.get();
- if (IBB->getFragmentNum() != TrampolineBB->getFragmentNum())
- continue;
- if (IBB == TrampolineBB.get())
+ if (IBB->getFragmentNum() != TBB->getFragmentNum())
continue;
if (IBB->getOutputStartAddress() >= TrampolineAddress) {
IBB->setOutputStartAddress(IBB->getOutputStartAddress() +
@@ -726,13 +722,7 @@ void LongJmpPass::relaxLocalBranches(BinaryFunction &BF) {
}
}
}
-
- if (!FragmentTrampolines.lookup(TargetBB))
- FragmentTrampolines[TargetBB] = TrampolineBB.get();
- FunctionTrampolines.emplace_back(BB ? BB : FF.back(),
- std::move(TrampolineBB));
-
- return FunctionTrampolines.back().second.get();
+ return TBB;
};
// Pre-populate trampolines by splitting unconditional branches from the
```
https://github.com/llvm/llvm-project/pull/112110
More information about the llvm-commits
mailing list