[llvm-branch-commits] [llvm] [BOLT] Match functions with call graph (PR #98125)
Shaw Young via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Jul 9 13:52:14 PDT 2024
https://github.com/shawbyoung updated https://github.com/llvm/llvm-project/pull/98125
>From cf32a43e7c2b04079c6123fe13df4fb7226d771f Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Tue, 9 Jul 2024 10:04:25 -0700
Subject: [PATCH 1/2] Comments
Created using spr 1.3.4
---
bolt/lib/Profile/YAMLProfileReader.cpp | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 69ea0899c5f2c..6753337c24ea7 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -501,7 +501,6 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) {
// Maps binary functions to adjacent functions in the FCG.
for (const BinaryFunction *CallerBF : BFs) {
- // Add all call targets to the hash map.
for (const BinaryBasicBlock &BB : CallerBF->blocks()) {
for (const MCInst &Inst : BB) {
if (!BC.MIB->isCall(Instr))
@@ -533,7 +532,8 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) {
}
}
- // Create mapping from neighbor hash to BFs.
+ // Using the constructed adjacent function mapping, creates mapping from
+ // neighbor hash to BFs.
std::unordered_map<uint64_t, std::vector<const BinaryFunction *>>
NeighborHashToBFs;
for (const BinaryFunction *BF : BFs) {
@@ -552,12 +552,12 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) {
.push_back(BF);
}
- // TODO: change call anchor PR to have this representation - we need it here
+ // TODO: note, this will be introduced in the matching functions with calls
+ // as anchors pr
DenseMap<uint32_t, const yaml::bolt::BinaryFunctionProfile * YamlBF>
IdToYAMLBF;
- // TODO: change call anchor PR to have this representation - we need it here
- // Maps hashes to profiled functions.
+ // Maps YAML functions to adjacent functions in the profile FCG.
std::unordered_map<const yaml::bolt::BinaryFunctionProfile * YamlBF,
FunctionHashes>
YamlBFToHashes(BFs.size());
@@ -590,7 +590,7 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) {
}
}
- // Matching YAMLBF with neighbor hashes.
+ // Matches YAMLBF to BFs with neighbor hashes.
for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
if (YamlBF.Used)
continue;
>From ee9049fc4bd3d4203c19c9c0982a78ab3b47666f Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Tue, 9 Jul 2024 13:52:05 -0700
Subject: [PATCH 2/2] Moved blended hash definition
Created using spr 1.3.4
---
bolt/include/bolt/Profile/YAMLProfileReader.h | 69 ++++++++++-
bolt/lib/Profile/StaleProfileMatching.cpp | 65 -----------
bolt/lib/Profile/YAMLProfileReader.cpp | 110 ++++++++----------
3 files changed, 119 insertions(+), 125 deletions(-)
diff --git a/bolt/include/bolt/Profile/YAMLProfileReader.h b/bolt/include/bolt/Profile/YAMLProfileReader.h
index 36e8f8739eee1..e8a34ecad9a08 100644
--- a/bolt/include/bolt/Profile/YAMLProfileReader.h
+++ b/bolt/include/bolt/Profile/YAMLProfileReader.h
@@ -16,6 +16,73 @@
namespace llvm {
namespace bolt {
+/// An object wrapping several components of a basic block hash. The combined
+/// (blended) hash is represented and stored as one uint64_t, while individual
+/// components are of smaller size (e.g., uint16_t or uint8_t).
+struct BlendedBlockHash {
+private:
+ using ValueOffset = Bitfield::Element<uint16_t, 0, 16>;
+ using ValueOpcode = Bitfield::Element<uint16_t, 16, 16>;
+ using ValueInstr = Bitfield::Element<uint16_t, 32, 16>;
+ using ValuePred = Bitfield::Element<uint8_t, 48, 8>;
+ using ValueSucc = Bitfield::Element<uint8_t, 56, 8>;
+
+public:
+ explicit BlendedBlockHash() {}
+
+ explicit BlendedBlockHash(uint64_t Hash) {
+ Offset = Bitfield::get<ValueOffset>(Hash);
+ OpcodeHash = Bitfield::get<ValueOpcode>(Hash);
+ InstrHash = Bitfield::get<ValueInstr>(Hash);
+ PredHash = Bitfield::get<ValuePred>(Hash);
+ SuccHash = Bitfield::get<ValueSucc>(Hash);
+ }
+
+ /// Combine the blended hash into uint64_t.
+ uint64_t combine() const {
+ uint64_t Hash = 0;
+ Bitfield::set<ValueOffset>(Hash, Offset);
+ Bitfield::set<ValueOpcode>(Hash, OpcodeHash);
+ Bitfield::set<ValueInstr>(Hash, InstrHash);
+ Bitfield::set<ValuePred>(Hash, PredHash);
+ Bitfield::set<ValueSucc>(Hash, SuccHash);
+ return Hash;
+ }
+
+ /// Compute a distance between two given blended hashes. The smaller the
+ /// distance, the more similar two blocks are. For identical basic blocks,
+ /// the distance is zero.
+ uint64_t distance(const BlendedBlockHash &BBH) const {
+ assert(OpcodeHash == BBH.OpcodeHash &&
+ "incorrect blended hash distance computation");
+ uint64_t Dist = 0;
+ // Account for NeighborHash
+ Dist += SuccHash == BBH.SuccHash ? 0 : 1;
+ Dist += PredHash == BBH.PredHash ? 0 : 1;
+ Dist <<= 16;
+ // Account for InstrHash
+ Dist += InstrHash == BBH.InstrHash ? 0 : 1;
+ Dist <<= 16;
+ // Account for Offset
+ Dist += (Offset >= BBH.Offset ? Offset - BBH.Offset : BBH.Offset - Offset);
+ return Dist;
+ }
+
+ /// The offset of the basic block from the function start.
+ uint16_t Offset{0};
+ /// (Loose) Hash of the basic block instructions, excluding operands.
+ uint16_t OpcodeHash{0};
+ /// (Strong) Hash of the basic block instructions, including opcodes and
+ /// operands.
+ uint16_t InstrHash{0};
+ /// (Loose) Hashes of the predecessors of the basic block.
+ uint8_t PredHash{0};
+ /// (Loose) Hashes of the successors of the basic block.
+ uint8_t SuccHash{0};
+};
+
+struct CallGraphMatcher {};
+
class YAMLProfileReader : public ProfileReaderBase {
public:
explicit YAMLProfileReader(StringRef Filename)
@@ -92,7 +159,7 @@ class YAMLProfileReader : public ProfileReaderBase {
/// Matches functions using exact hash.
size_t matchWithHash(BinaryContext &BC);
-
+
/// Matches functions using the call graph.
size_t matchWithCallGraph(BinaryContext &BC);
diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index e473beb2fad8c..9631ac176554e 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -121,71 +121,6 @@ cl::opt<unsigned> StaleMatchingCostJumpUnknownFTInc(
namespace llvm {
namespace bolt {
-/// An object wrapping several components of a basic block hash. The combined
-/// (blended) hash is represented and stored as one uint64_t, while individual
-/// components are of smaller size (e.g., uint16_t or uint8_t).
-struct BlendedBlockHash {
-private:
- using ValueOffset = Bitfield::Element<uint16_t, 0, 16>;
- using ValueOpcode = Bitfield::Element<uint16_t, 16, 16>;
- using ValueInstr = Bitfield::Element<uint16_t, 32, 16>;
- using ValuePred = Bitfield::Element<uint8_t, 48, 8>;
- using ValueSucc = Bitfield::Element<uint8_t, 56, 8>;
-
-public:
- explicit BlendedBlockHash() {}
-
- explicit BlendedBlockHash(uint64_t Hash) {
- Offset = Bitfield::get<ValueOffset>(Hash);
- OpcodeHash = Bitfield::get<ValueOpcode>(Hash);
- InstrHash = Bitfield::get<ValueInstr>(Hash);
- PredHash = Bitfield::get<ValuePred>(Hash);
- SuccHash = Bitfield::get<ValueSucc>(Hash);
- }
-
- /// Combine the blended hash into uint64_t.
- uint64_t combine() const {
- uint64_t Hash = 0;
- Bitfield::set<ValueOffset>(Hash, Offset);
- Bitfield::set<ValueOpcode>(Hash, OpcodeHash);
- Bitfield::set<ValueInstr>(Hash, InstrHash);
- Bitfield::set<ValuePred>(Hash, PredHash);
- Bitfield::set<ValueSucc>(Hash, SuccHash);
- return Hash;
- }
-
- /// Compute a distance between two given blended hashes. The smaller the
- /// distance, the more similar two blocks are. For identical basic blocks,
- /// the distance is zero.
- uint64_t distance(const BlendedBlockHash &BBH) const {
- assert(OpcodeHash == BBH.OpcodeHash &&
- "incorrect blended hash distance computation");
- uint64_t Dist = 0;
- // Account for NeighborHash
- Dist += SuccHash == BBH.SuccHash ? 0 : 1;
- Dist += PredHash == BBH.PredHash ? 0 : 1;
- Dist <<= 16;
- // Account for InstrHash
- Dist += InstrHash == BBH.InstrHash ? 0 : 1;
- Dist <<= 16;
- // Account for Offset
- Dist += (Offset >= BBH.Offset ? Offset - BBH.Offset : BBH.Offset - Offset);
- return Dist;
- }
-
- /// The offset of the basic block from the function start.
- uint16_t Offset{0};
- /// (Loose) Hash of the basic block instructions, excluding operands.
- uint16_t OpcodeHash{0};
- /// (Strong) Hash of the basic block instructions, including opcodes and
- /// operands.
- uint16_t InstrHash{0};
- /// (Loose) Hashes of the predecessors of the basic block.
- uint8_t PredHash{0};
- /// (Loose) Hashes of the successors of the basic block.
- uint8_t SuccHash{0};
-};
-
/// The object is used to identify and match basic blocks in a BinaryFunction
/// given their hashes computed on a binary built from several revisions behind
/// release.
diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 6753337c24ea7..2cab5c385c3c9 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -458,58 +458,53 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) {
struct FunctionHashes {
uint64_t Hash{0};
uint64_t AdjacentFunctionHash{0};
- std::set<uint64_t> AdjacentFunctionHashesSet;
+ std::vector<uint64_t> AdjacentFunctionHashesSet;
};
size_t MatchedWithCallGraph = 0;
std::vector<BinaryFunction *> BFs = BC.getAllBinaryFunctions();
- std::unordered_map<const BinaryFunction *, FunctionHashes> BFToHashes(
- BFs.size());
+ std::unordered_map<BinaryFunction *, FunctionHashes> BFToHashes(BFs.size());
// Computes the loose hash, as in the opcode hash, of a binary function.
auto ComputeBFLooseHash = [&](const BinaryFunction *BF) {
- std::string FunctionHashStr = std::accumulate(
- BF->begin(), BF->end(), std::string(""),
- [&](std::string Accum, const BinaryBasicBlock &BB) {
- std::string BlockHashStr = hashBlockLoose(BC, BB);
- uint16_t OpcodeHash;
- if (YamlBP.Header.HashFunction == HashFunction::StdHash)
- OpcodeHash =
- (uint16_t)hash_value(std::hash<std::string>{}(BlockHashStr));
- else if (YamlBP.Header.HashFunction == HashFunction::XXH3)
- OpcodeHash = (uint16_t)llvm::xxh3_64bits(BlockHashStr);
- else
- llvm_unreachable("Unsupported hash function");
- return std::move(Accum) + std::to_string(OpcodeHash);
- });
-
+ std::string FunctionHashStr;
+ for (const BinaryBasicBlock &BB : *BF) {
+ std::string BlockHashStr = hashBlockLoose(BC, BB);
+ uint16_t OpcodeHash;
+ if (YamlBP.Header.HashFunction == HashFunction::StdHash)
+ OpcodeHash =
+ (uint16_t)hash_value(std::hash<std::string>{}(BlockHashStr));
+ else if (YamlBP.Header.HashFunction == HashFunction::XXH3)
+ OpcodeHash = (uint16_t)llvm::xxh3_64bits(BlockHashStr);
+ else
+ llvm_unreachable("Unsupported hash function");
+ FunctionHashStr.append(std::to_string(OpcodeHash));
+ }
return std::hash<std::string>{}(FunctionHashStr);
};
// Computes the loose hash of a function profile.
auto ComputeYamlBFLooseHash =
[&](const yaml::bolt::BinaryFunctionProfile &YamlBF) {
- std::string HashStr = std::to_string(
- YamlBF.Blocks.begin(), YamlBF.Blocks.end(), std::string(""),
- [&](std::string Accum,
- const yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
- BlendedBlockHash YamlHash(YamlBB.Hash);
- return std::move(Accum) + std::to_string(YamlHash.OpcodeHash);
- });
-
+ std::string HashStr;
+ for (auto &YamlBB : YamlBF.Blocks) {
+ BlendedBlockHash YamlHash(YamlBB.Hash);
+ HashStr.append(std::to_string(YamlHash.OpcodeHash));
+ }
return std::hash<std::string>{}(HashStr);
};
// Maps binary functions to adjacent functions in the FCG.
- for (const BinaryFunction *CallerBF : BFs) {
+ for (BinaryFunction *CallerBF : BFs) {
for (const BinaryBasicBlock &BB : CallerBF->blocks()) {
- for (const MCInst &Inst : BB) {
+ for (const MCInst &Instr : BB) {
if (!BC.MIB->isCall(Instr))
continue;
const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr);
if (!CallSymbol)
continue;
- BinaryData *BD =
- BC.getBinaryDataByName(CallSymbol->getName()) if (!BD) continue;
+ BinaryData *BD = BC.getBinaryDataByName(CallSymbol->getName());
+ if (!BD)
+ continue;
BinaryFunction *CalleeBF = BC.getFunctionForSymbol(BD->getSymbol());
if (!CalleeBF)
continue;
@@ -524,9 +519,9 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) {
CallerBFIt->second = FunctionHashes();
CallerBFIt->second.Hash = ComputeBFLooseHash(CallerBF);
}
- CalleeBFIt->second.AdjacentFunctionHashesSet.insert(
+ CalleeBFIt->second.AdjacentFunctionHashesSet.push_back(
CallerBFIt->second.Hash);
- CallerBFIt->second.AdjacentFunctionHashesSet.insert(
+ CallerBFIt->second.AdjacentFunctionHashesSet.push_back(
CalleeBFIt->second.Hash);
}
}
@@ -534,13 +529,13 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) {
// Using the constructed adjacent function mapping, creates mapping from
// neighbor hash to BFs.
- std::unordered_map<uint64_t, std::vector<const BinaryFunction *>>
- NeighborHashToBFs;
- for (const BinaryFunction *BF : BFs) {
+ std::unordered_map<uint64_t, std::vector<BinaryFunction *>> NeighborHashToBFs;
+ for (BinaryFunction *BF : BFs) {
auto It = BFToHashes.find(BF);
assert(It != BFToHashes.end() && "All BFs should be processed");
FunctionHashes &FunctionHashes = It->second;
-
+ std::sort(FunctionHashes.AdjacentFunctionHashesSet.begin(),
+ FunctionHashes.AdjacentFunctionHashesSet.end());
std::string AdjacentFunctionHashStr =
std::accumulate(FunctionHashes.AdjacentFunctionHashesSet.begin(),
FunctionHashes.AdjacentFunctionHashesSet.end(),
@@ -554,38 +549,34 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) {
// TODO: note, this will be introduced in the matching functions with calls
// as anchors pr
- DenseMap<uint32_t, const yaml::bolt::BinaryFunctionProfile * YamlBF>
- IdToYAMLBF;
+ DenseMap<uint32_t, const yaml::bolt::BinaryFunctionProfile *> IdToYAMLBF;
// Maps YAML functions to adjacent functions in the profile FCG.
- std::unordered_map<const yaml::bolt::BinaryFunctionProfile * YamlBF,
- FunctionHashes>
+ std::unordered_map<const yaml::bolt::BinaryFunctionProfile *, FunctionHashes>
YamlBFToHashes(BFs.size());
for (const auto &CallerYamlBF : YamlBP.Functions) {
- if (YamlBF.Used)
- continue;
- for (const auto &YamlBB : CallerYamlBF.BasicBlocks) {
- for (const auto &CallSite : YamlBB.Callsites) {
+ for (const auto &YamlBB : CallerYamlBF.Blocks) {
+ for (const auto &CallSite : YamlBB.CallSites) {
auto IdToYAMLBFIt = IdToYAMLBF.find(CallSite.DestId);
if (IdToYAMLBFIt == IdToYAMLBF.end())
continue;
- auto CalleeBFIt = YamlBFToHashes.find(IdToYAMLBFIt->second);
- if (CalleeBFIt == YamlBFToHashes.end()) {
- CalleeBFIt->second = FunctionHashes();
- CalleeBFIt->second.Hash =
+ auto CalleeYamlBFIt = YamlBFToHashes.find(IdToYAMLBFIt->second);
+ if (CalleeYamlBFIt == YamlBFToHashes.end()) {
+ CalleeYamlBFIt->second = FunctionHashes();
+ CalleeYamlBFIt->second.Hash =
ComputeYamlBFLooseHash(*IdToYAMLBFIt->second);
}
- auto CallerBFIt = YamlBFToHashes.find(&CallerYamlBF);
- if (CallerBFIt == YamlBFToHashes.end()) {
- CallerBFIt->second = FunctionHashes();
- CallerBFIt->second.Hash = ComputeYamlBFLooseHash(CallerBF);
+ auto CallerYamlBFIt = YamlBFToHashes.find(&CallerYamlBF);
+ if (CallerYamlBFIt == YamlBFToHashes.end()) {
+ CallerYamlBFIt->second = FunctionHashes();
+ CallerYamlBFIt->second.Hash = ComputeYamlBFLooseHash(CallerYamlBF);
}
- CalleeBFIt->second.AdjacentFunctionHashesSet.insert(
- CallerBFIt->second.Hash);
- CallerBFIt->second.AdjacentFunctionHashesSet.insert(
- CalleeBFIt->second.Hash);
+ CalleeYamlBFIt->second.AdjacentFunctionHashesSet.push_back(
+ CallerYamlBFIt->second.Hash);
+ CallerYamlBFIt->second.AdjacentFunctionHashesSet.push_back(
+ CalleeYamlBFIt->second.Hash);
}
}
}
@@ -599,7 +590,8 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) {
assert(It != YamlBFToHashes.end() &&
"All unused functions should be processed");
FunctionHashes &FunctionHashes = It->second;
-
+ std::sort(FunctionHashes.AdjacentFunctionHashesSet.begin(),
+ FunctionHashes.AdjacentFunctionHashesSet.end());
std::string AdjacentFunctionHashStr =
std::accumulate(FunctionHashes.AdjacentFunctionHashesSet.begin(),
FunctionHashes.AdjacentFunctionHashesSet.end(),
@@ -614,7 +606,7 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) {
if (NeighborHashToBFsIt == NeighborHashToBFs.end())
continue;
- for (const BinaryFunction *BF : NeighborHashToBFsIt->second) {
+ for (BinaryFunction *BF : NeighborHashToBFsIt->second) {
if (!ProfiledFunctions.count(BF) && profileMatches(YamlBF, *BF)) {
matchProfileToFunction(YamlBF, *BF);
++MatchedWithCallGraph;
@@ -668,7 +660,7 @@ size_t YAMLProfileReader::matchWithNameSimilarity(BinaryContext &BC) {
// Maps namespaces to BFs excluding binary functions with no equal sized
// profiled functions belonging to the same namespace.
- / for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
+ for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
std::string DemangledName = BF->getDemangledName();
std::string Namespace = DeriveNameSpace(DemangledName);
More information about the llvm-branch-commits
mailing list