[lld] 60e4d24 - [lld-macho,BalancedPartition] Simplify relocation hash and avoid xxHash
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 16 09:31:46 PST 2025
Author: Fangrui Song
Date: 2025-01-16T09:31:42-08:00
New Revision: 60e4d24963ebc256dd68f2f9d969ca8e52cd9649
URL: https://github.com/llvm/llvm-project/commit/60e4d24963ebc256dd68f2f9d969ca8e52cd9649
DIFF: https://github.com/llvm/llvm-project/commit/60e4d24963ebc256dd68f2f9d969ca8e52cd9649.diff
LOG: [lld-macho,BalancedPartition] Simplify relocation hash and avoid xxHash
xxHash, inferior to xxh3, is discouraged. We try not to use xxhash in
lld.
Switch to read32le for content hash and xxh3/stable_hash_combine for
relocation hash. Remove the intermediate std::string for relocation
hash.
Change the tail hashing scheme to consider individual bytes instead.
This helps group 0102 and 0201 together. The benefit is negligible,
though.
Pull Request: https://github.com/llvm/llvm-project/pull/121729
Added:
Modified:
lld/MachO/BPSectionOrderer.h
lld/include/lld/Common/BPSectionOrdererBase.h
Removed:
################################################################################
diff --git a/lld/MachO/BPSectionOrderer.h b/lld/MachO/BPSectionOrderer.h
index 4facb652d4c874..69c6b260f044cb 100644
--- a/lld/MachO/BPSectionOrderer.h
+++ b/lld/MachO/BPSectionOrderer.h
@@ -19,7 +19,10 @@
#include "Symbols.h"
#include "lld/Common/BPSectionOrdererBase.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StableHashing.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/xxhash.h"
namespace lld::macho {
@@ -90,23 +93,24 @@ class BPSectionMacho : public BPSectionBase {
§ionToIdx) const override {
constexpr unsigned windowSize = 4;
- // Calculate content hashes
- size_t dataSize = isec->data.size();
- for (size_t i = 0; i < dataSize; i++) {
- auto window = isec->data.drop_front(i).take_front(windowSize);
- hashes.push_back(xxHash64(window));
- }
+ // Calculate content hashes: k-mers and the last k-1 bytes.
+ ArrayRef<uint8_t> data = isec->data;
+ if (data.size() >= windowSize)
+ for (size_t i = 0; i <= data.size() - windowSize; ++i)
+ hashes.push_back(llvm::support::endian::read32le(data.data() + i));
+ for (uint8_t byte : data.take_back(windowSize - 1))
+ hashes.push_back(byte);
// Calculate relocation hashes
for (const auto &r : isec->relocs) {
- if (r.length == 0 || r.referent.isNull() || r.offset >= isec->data.size())
+ if (r.length == 0 || r.referent.isNull() || r.offset >= data.size())
continue;
uint64_t relocHash = getRelocHash(r, sectionToIdx);
uint32_t start = (r.offset < windowSize) ? 0 : r.offset - windowSize + 1;
for (uint32_t i = start; i < r.offset + r.length; i++) {
- auto window = isec->data.drop_front(i).take_front(windowSize);
- hashes.push_back(xxHash64(window) + relocHash);
+ auto window = data.drop_front(i).take_front(windowSize);
+ hashes.push_back(xxh3_64bits(window) ^ relocHash);
}
}
@@ -124,19 +128,17 @@ class BPSectionMacho : public BPSectionBase {
std::optional<uint64_t> sectionIdx;
if (auto it = sectionToIdx.find(isec); it != sectionToIdx.end())
sectionIdx = it->second;
- std::string kind;
+ uint64_t kind = -1, value = 0;
if (isec)
- kind = ("Section " + Twine(isec->kind())).str();
+ kind = uint64_t(isec->kind());
if (auto *sym = reloc.referent.dyn_cast<Symbol *>()) {
- kind += (" Symbol " + Twine(sym->kind())).str();
- if (auto *d = llvm::dyn_cast<Defined>(sym)) {
- return BPSectionBase::getRelocHash(kind, sectionIdx.value_or(0),
- d->value, reloc.addend);
- }
+ kind = (kind << 8) | uint8_t(sym->kind());
+ if (auto *d = llvm::dyn_cast<Defined>(sym))
+ value = d->value;
}
- return BPSectionBase::getRelocHash(kind, sectionIdx.value_or(0), 0,
- reloc.addend);
+ return llvm::stable_hash_combine(kind, sectionIdx.value_or(0), value,
+ reloc.addend);
}
};
diff --git a/lld/include/lld/Common/BPSectionOrdererBase.h b/lld/include/lld/Common/BPSectionOrdererBase.h
index bd5bd638ccd2ac..bbd05edc5e55ec 100644
--- a/lld/include/lld/Common/BPSectionOrdererBase.h
+++ b/lld/include/lld/Common/BPSectionOrdererBase.h
@@ -18,7 +18,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/Support/xxhash.h"
#include <memory>
#include <optional>
@@ -56,14 +55,6 @@ class BPSectionBase {
return P1;
}
- static uint64_t getRelocHash(llvm::StringRef kind, uint64_t sectionIdx,
- uint64_t offset, uint64_t addend) {
- return llvm::xxHash64((kind + ": " + llvm::Twine::utohexstr(sectionIdx) +
- " + " + llvm::Twine::utohexstr(offset) + " + " +
- llvm::Twine::utohexstr(addend))
- .str());
- }
-
/// Reorders sections using balanced partitioning algorithm based on profile
/// data.
static llvm::DenseMap<const BPSectionBase *, int>
More information about the llvm-commits
mailing list