[llvm] [BOLT][AArch64] Add call relaxation pass (PR #173952)
Maksim Panchenko via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 11 13:02:46 PST 2026
================
@@ -944,13 +953,306 @@ void LongJmpPass::relaxLocalBranches(BinaryFunction &BF) {
}
}
+void LongJmpPass::relaxCalls(BinaryContext &BC) {
+ // Operate on a copy of binary functions. We are going to manually insert new
+ // thunks and update the list.
+ BinaryFunctionListType OutputFunctions = BC.getOutputBinaryFunctions();
+
+ // Conservatively estimate emitted function size. Assume the worst case
+ // alignment.
+ auto estimateFunctionSize = [&](const BinaryFunction &BF) -> uint64_t {
+ if (!BC.shouldEmit(BF))
+ return 0;
+ uint64_t Size = BF.estimateSize();
+ if (BF.hasValidIndex())
+ Size += BF.getAlignment();
+
+ if (BF.hasIslandsInfo()) {
+ Size += BF.getConstantIslandAlignment();
+ Size += BF.estimateConstantIslandSize();
+ }
+
+ return Size;
+ };
+
+ // Map every function to its direct callees. Note that this is different from
+ // the regular call graph as here we completely ignore indirect calls.
+ uint64_t EstimatedSize = 0;
+ DenseMap<BinaryFunction *, std::set<const MCSymbol *>> CallMap;
+ for (BinaryFunction *BF : OutputFunctions) {
+ if (!BC.shouldEmit(*BF) || BF->isPatch())
+ continue;
+
+ EstimatedSize += estimateFunctionSize(*BF);
+
+ for (const BinaryBasicBlock &BB : *BF) {
+ for (const MCInst &Inst : BB) {
+ if (!BC.MIB->isCall(Inst) || BC.MIB->isIndirectCall(Inst) ||
+ BC.MIB->isIndirectBranch(Inst))
+ continue;
+ const MCSymbol *TargetSymbol = BC.MIB->getTargetSymbol(Inst);
+ assert(TargetSymbol);
+
+ // Ignore internal calls that use basic block labels as a destination.
+ if (!BC.getFunctionForSymbol(TargetSymbol))
+ continue;
+
+ CallMap[BF].insert(TargetSymbol);
+ }
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "LongJmp: estimated code size : " << EstimatedSize
+ << '\n');
+
+ // Build clusters in the order the functions will appear in the output.
+ std::vector<FunctionCluster> Clusters;
+ for (size_t Index = 0, NumFuncs = OutputFunctions.size(); Index < NumFuncs;
+ ++Index) {
+ const size_t BFIndex =
+ opts::HotFunctionsAtEnd ? NumFuncs - Index - 1 : Index;
+ BinaryFunction *BF = OutputFunctions[BFIndex];
+ if (!BC.shouldEmit(*BF) || BF->isPatch())
+ continue;
+
+ const uint64_t BFSize = estimateFunctionSize(*BF);
+ if (Clusters.empty() || Clusters.back().Size + BFSize > MaxClusterSize) {
+ Clusters.emplace_back(FunctionCluster());
+ Clusters.back().FirstFunctionIndex = BFIndex;
+ }
+
+ FunctionCluster &FC = Clusters.back();
+ FC.Functions.insert(BF);
+
+ // When a function is added to the cluster, we have to remove all of its
+ // symbols from the cluster callee list. These include alternative symbols
+ // (e.g. after ICF) and secondary entry point symbols.
+ for (const MCSymbol *Symbol : BF->getSymbols()) {
+ auto It = FC.Callees.find(Symbol);
+ if (It != FC.Callees.end())
+ FC.Callees.erase(It);
+ }
+ BF->forEachEntryPoint(
+ [&FC](uint64_t Offset, const MCSymbol *EntrySymbol) -> bool {
+ auto It = FC.Callees.find(EntrySymbol);
+ if (It != FC.Callees.end())
+ FC.Callees.erase(It);
+ return true;
+ });
+
+ // Update cluster callee list with added function callees.
+ for (const MCSymbol *CalleeSymbol : CallMap[BF]) {
+ BinaryFunction *Callee = BC.getFunctionForSymbol(CalleeSymbol);
+ if (!FC.Functions.count(Callee)) {
+ FC.Callees.insert(CalleeSymbol);
+ }
+ }
+
+ FC.Size += BFSize;
+ FC.LastFunctionIndex = BFIndex;
+ }
+
+ if (opts::HotFunctionsAtEnd) {
+ std::reverse(Clusters.begin(), Clusters.end());
+ llvm::for_each(Clusters, [](FunctionCluster &FC) {
+ std::swap(FC.LastFunctionIndex, FC.FirstFunctionIndex);
+ });
+ }
+
+ if (Clusters.empty())
+ return;
+
+ // Print cluster stats.
+ BC.outs() << "BOLT-INFO: built " << Clusters.size()
+ << " function cluster(s)\n";
+ uint64_t ClusterIndex = 0;
+ for (const FunctionCluster &FC : Clusters) {
+ BC.outs() << "BOLT-INFO: cluster: " << ClusterIndex++ << '\n'
+ << "BOLT-INFO: " << FC.Functions.size() << " function(s)\n"
+ << "BOLT-INFO: " << FC.Callees.size() << " callee(s)\n"
+ << "BOLT-INFO: " << FC.Size << " estimated bytes\n";
+ }
+
+ if (opts::RelaxPLT) {
+ // Populate one of the clusters with PLT functions based on the proximity of
+ // the PLT section to avoid unneeded thunk redirection.
+ const size_t PLTClusterNum = opts::UseOldText ? Clusters.size() - 1 : 0;
+ auto &PLTCluster = Clusters[PLTClusterNum];
+ for (BinaryFunction &BF :
+ llvm::make_second_range(BC.getBinaryFunctions())) {
+ if (BF.isPLTFunction()) {
+ PLTCluster.Functions.insert(&BF);
+ auto It = PLTCluster.Callees.find(BF.getSymbol());
+ if (It != PLTCluster.Callees.end())
+ PLTCluster.Callees.erase(It);
+ }
+ }
+ }
+
+ // Create a thunk with +-128MB span.
+ size_t NumShortThunks = 0;
+ auto createShortThunk = [&](const MCSymbol *TargetSymbol) {
+ ++NumShortThunks;
+ BinaryFunction *ThunkBF = BC.createThunkBinaryFunction(
+ "__AArch64Thunk_" + TargetSymbol->getName().str());
+ MCInst Inst;
+ BC.MIB->createTailCall(Inst, TargetSymbol, BC.Ctx.get());
+ ThunkBF->addBasicBlock()->addInstruction(Inst);
+
+ return ThunkBF;
+ };
+
+ // Create a thunk with +-4GB span.
+ size_t NumLongThunks = 0;
+ auto createLongThunk = [&](const MCSymbol *TargetSymbol) {
+ ++NumLongThunks;
+ BinaryFunction *ThunkBF = BC.createThunkBinaryFunction(
+ "__AArch64ADRPThunk_" + TargetSymbol->getName().str());
+ InstructionListType Instructions;
+ BC.MIB->createLongTailCall(Instructions, TargetSymbol, BC.Ctx.get());
+ ThunkBF->addBasicBlock()->addInstructions(Instructions);
+
+ return ThunkBF;
+ };
+
+ for (unsigned ClusterNum = 0; ClusterNum < Clusters.size(); ++ClusterNum) {
+ FunctionCluster &FC = Clusters[ClusterNum];
+ SmallVector<const MCSymbol *, 16> Callees(FC.Callees.begin(),
+ FC.Callees.end());
+
+ // Generate thunks in deterministic order.
+ llvm::sort(Callees, [&BC](const MCSymbol *A, const MCSymbol *B) {
+ uint64_t EntryA;
+ uint64_t EntryB;
+ BinaryFunction *BFA = BC.getFunctionForSymbol(A, &EntryA);
+ BinaryFunction *BFB = BC.getFunctionForSymbol(B, &EntryB);
+ if (BFA == BFB) {
+ if (EntryA != EntryB)
+ return EntryA < EntryB;
+
+ // Use lexicographical order for ICF'ed symbols.
+ return A->getName() < B->getName();
+ }
+ return compareBinaryFunctionByIndex(BFA, BFB);
+ });
+
+ // Return index of adjacent cluster containing the function.
+ auto getAdjClusterWithFunction =
+ [&](const BinaryFunction *BF) -> std::optional<unsigned> {
+ if (ClusterNum > 0 && Clusters[ClusterNum - 1].Functions.count(BF))
+ return ClusterNum - 1;
+ if (ClusterNum + 1 < Clusters.size() &&
+ Clusters[ClusterNum + 1].Functions.count(BF))
+ return ClusterNum + 1;
+ return std::nullopt;
+ };
+
+ const FunctionCluster *PrevCluster =
+ ClusterNum ? &Clusters[ClusterNum - 1] : nullptr;
+
+ // Create short thunks for callees in adjacent clusters and long thunks
+ // for callees outside.
+ for (const MCSymbol *Callee : Callees) {
+ if (FC.Thunks.count(Callee))
+ continue;
+
+ BinaryFunction *Thunk = 0;
+ std::optional<unsigned> AdjCluster =
+ getAdjClusterWithFunction(BC.getFunctionForSymbol(Callee));
+ if (AdjCluster) {
+ Thunk = createShortThunk(Callee);
+ } else {
+ // Previous cluster may already have a long thunk that can be reused.
+ if (PrevCluster) {
+ auto It = PrevCluster->Thunks.find(Callee);
+ // Reuse only if previous cluster hosts this thunk.
+ if (It != PrevCluster->Thunks.end() &&
+ llvm::is_contained(PrevCluster->ThunkList, It->second)) {
+ FC.Thunks[Callee] = It->second;
+ continue;
+ }
+ }
+ Thunk = createLongThunk(Callee);
+ }
+
+ // The cluster that will host this thunk. If the current cluster is the
+ // last one, try to use the previous one. Matters when we want to have hot
+ // functions at higher addresses under HotFunctionsAtEnd.
+ FunctionCluster *ThunkCluster = &Clusters[ClusterNum];
+ if ((AdjCluster && *AdjCluster == ClusterNum - 1) ||
+ (ClusterNum && ClusterNum == Clusters.size() - 1))
+ ThunkCluster = &Clusters[ClusterNum - 1];
+ ThunkCluster->ThunkList.push_back(Thunk);
+
+ // Register thunks for all symbols associated with the function.
+ uint64_t EntryID = 0;
+ const BinaryFunction *BF = BC.getFunctionForSymbol(Callee, &EntryID);
+ if (EntryID != 0) {
+ FC.Thunks[Callee] = Thunk;
+ } else {
+ for (const MCSymbol *Symbol : BF->getSymbols()) {
+ FC.Thunks[Symbol] = Thunk;
+ }
+ }
+ }
+ }
+
+ if (NumShortThunks)
----------------
maksfb wrote:
Will you find it useful? Normally, no thunks will be created when all code will fit into a single cluster and such information becomes redundant.
https://github.com/llvm/llvm-project/pull/173952
More information about the llvm-commits
mailing list