[lld] 60e4d24 - [lld-macho,BalancedPartition] Simplify relocation hash and avoid xxHash

via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 16 09:31:46 PST 2025


Author: Fangrui Song
Date: 2025-01-16T09:31:42-08:00
New Revision: 60e4d24963ebc256dd68f2f9d969ca8e52cd9649

URL: https://github.com/llvm/llvm-project/commit/60e4d24963ebc256dd68f2f9d969ca8e52cd9649
DIFF: https://github.com/llvm/llvm-project/commit/60e4d24963ebc256dd68f2f9d969ca8e52cd9649.diff

LOG: [lld-macho,BalancedPartition] Simplify relocation hash and avoid xxHash

xxHash, inferior to xxh3, is discouraged. We try not to use xxhash in
lld.

Switch to read32le for content hash and xxh3/stable_hash_combine for
relocation hash. Remove the intermediate std::string for relocation
hash.

Change the tail hashing scheme to consider individual bytes instead.
This helps group 0102 and 0201 together. The benefit is negligible,
though.

Pull Request: https://github.com/llvm/llvm-project/pull/121729

Added: 
    

Modified: 
    lld/MachO/BPSectionOrderer.h
    lld/include/lld/Common/BPSectionOrdererBase.h

Removed: 
    


################################################################################
diff  --git a/lld/MachO/BPSectionOrderer.h b/lld/MachO/BPSectionOrderer.h
index 4facb652d4c874..69c6b260f044cb 100644
--- a/lld/MachO/BPSectionOrderer.h
+++ b/lld/MachO/BPSectionOrderer.h
@@ -19,7 +19,10 @@
 #include "Symbols.h"
 #include "lld/Common/BPSectionOrdererBase.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StableHashing.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/xxhash.h"
 
 namespace lld::macho {
 
@@ -90,23 +93,24 @@ class BPSectionMacho : public BPSectionBase {
                             &sectionToIdx) const override {
     constexpr unsigned windowSize = 4;
 
-    // Calculate content hashes
-    size_t dataSize = isec->data.size();
-    for (size_t i = 0; i < dataSize; i++) {
-      auto window = isec->data.drop_front(i).take_front(windowSize);
-      hashes.push_back(xxHash64(window));
-    }
+    // Calculate content hashes: k-mers and the last k-1 bytes.
+    ArrayRef<uint8_t> data = isec->data;
+    if (data.size() >= windowSize)
+      for (size_t i = 0; i <= data.size() - windowSize; ++i)
+        hashes.push_back(llvm::support::endian::read32le(data.data() + i));
+    for (uint8_t byte : data.take_back(windowSize - 1))
+      hashes.push_back(byte);
 
     // Calculate relocation hashes
     for (const auto &r : isec->relocs) {
-      if (r.length == 0 || r.referent.isNull() || r.offset >= isec->data.size())
+      if (r.length == 0 || r.referent.isNull() || r.offset >= data.size())
         continue;
 
       uint64_t relocHash = getRelocHash(r, sectionToIdx);
       uint32_t start = (r.offset < windowSize) ? 0 : r.offset - windowSize + 1;
       for (uint32_t i = start; i < r.offset + r.length; i++) {
-        auto window = isec->data.drop_front(i).take_front(windowSize);
-        hashes.push_back(xxHash64(window) + relocHash);
+        auto window = data.drop_front(i).take_front(windowSize);
+        hashes.push_back(xxh3_64bits(window) ^ relocHash);
       }
     }
 
@@ -124,19 +128,17 @@ class BPSectionMacho : public BPSectionBase {
     std::optional<uint64_t> sectionIdx;
     if (auto it = sectionToIdx.find(isec); it != sectionToIdx.end())
       sectionIdx = it->second;
-    std::string kind;
+    uint64_t kind = -1, value = 0;
     if (isec)
-      kind = ("Section " + Twine(isec->kind())).str();
+      kind = uint64_t(isec->kind());
 
     if (auto *sym = reloc.referent.dyn_cast<Symbol *>()) {
-      kind += (" Symbol " + Twine(sym->kind())).str();
-      if (auto *d = llvm::dyn_cast<Defined>(sym)) {
-        return BPSectionBase::getRelocHash(kind, sectionIdx.value_or(0),
-                                           d->value, reloc.addend);
-      }
+      kind = (kind << 8) | uint8_t(sym->kind());
+      if (auto *d = llvm::dyn_cast<Defined>(sym))
+        value = d->value;
     }
-    return BPSectionBase::getRelocHash(kind, sectionIdx.value_or(0), 0,
-                                       reloc.addend);
+    return llvm::stable_hash_combine(kind, sectionIdx.value_or(0), value,
+                                     reloc.addend);
   }
 };
 

diff  --git a/lld/include/lld/Common/BPSectionOrdererBase.h b/lld/include/lld/Common/BPSectionOrdererBase.h
index bd5bd638ccd2ac..bbd05edc5e55ec 100644
--- a/lld/include/lld/Common/BPSectionOrdererBase.h
+++ b/lld/include/lld/Common/BPSectionOrdererBase.h
@@ -18,7 +18,6 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
-#include "llvm/Support/xxhash.h"
 #include <memory>
 #include <optional>
 
@@ -56,14 +55,6 @@ class BPSectionBase {
     return P1;
   }
 
-  static uint64_t getRelocHash(llvm::StringRef kind, uint64_t sectionIdx,
-                               uint64_t offset, uint64_t addend) {
-    return llvm::xxHash64((kind + ": " + llvm::Twine::utohexstr(sectionIdx) +
-                           " + " + llvm::Twine::utohexstr(offset) + " + " +
-                           llvm::Twine::utohexstr(addend))
-                              .str());
-  }
-
   /// Reorders sections using balanced partitioning algorithm based on profile
   /// data.
   static llvm::DenseMap<const BPSectionBase *, int>


        


More information about the llvm-commits mailing list