[lld] 2090d66 - [lld-macho] Switch to xxh3_64bits

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 19 09:58:48 PDT 2023


Author: Fangrui Song
Date: 2023-07-19T09:58:44-07:00
New Revision: 2090d66b2376f3041d8a696fcaf94266188e6e96

URL: https://github.com/llvm/llvm-project/commit/2090d66b2376f3041d8a696fcaf94266188e6e96
DIFF: https://github.com/llvm/llvm-project/commit/2090d66b2376f3041d8a696fcaf94266188e6e96.diff

LOG: [lld-macho] Switch to xxh3_64bits

xxh3 is substantially faster than xxh64.
For lld/ELF, there is substantial speedup in `.debug_str` duplicate
elimination (D154813). Use xxh3 for lld-macho as well.

Reviewed By: #lld-macho, oontvoo

Differential Revision: https://reviews.llvm.org/D155677

Added: 
    

Modified: 
    lld/MachO/ICF.cpp
    lld/MachO/InputSection.cpp
    lld/MachO/SyntheticSections.cpp
    lld/MachO/Writer.cpp

Removed: 
    


################################################################################
diff  --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp
index 76dbd021142f65..0278bf7c6751a2 100644
--- a/lld/MachO/ICF.cpp
+++ b/lld/MachO/ICF.cpp
@@ -457,7 +457,7 @@ void macho::foldIdenticalSections(bool onlyCfStrings) {
     assert(isec->icfEqClass[0] == 0); // don't overwrite a unique ID!
     // Turn-on the top bit to guarantee that valid hashes have no collisions
     // with the small-integer unique IDs for ICF-ineligible sections
-    isec->icfEqClass[0] = xxHash64(isec->data) | (1ull << 31);
+    isec->icfEqClass[0] = xxh3_64bits(isec->data) | (1ull << 31);
   });
   // Now that every input section is either hashed or marked as unique, run the
   // segregation algorithm to detect foldable subsections.

diff  --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index eb1c51cdf8ec85..8f5affb1dc21d8 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -246,7 +246,7 @@ void CStringInputSection::splitIntoPieces() {
     size_t end = s.find(0);
     if (end == StringRef::npos)
       fatal(getLocation(off) + ": string is not null terminated");
-    uint32_t hash = deduplicateLiterals ? xxHash64(s.take_front(end)) : 0;
+    uint32_t hash = deduplicateLiterals ? xxh3_64bits(s.take_front(end)) : 0;
     pieces.emplace_back(off, hash);
     size_t size = end + 1; // include null terminator
     s = s.substr(size);

diff  --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 03084576955e2b..00e97a5e972822 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -1677,7 +1677,7 @@ void DeduplicatedCStringSection::writeTo(uint8_t *buf) const {
 DeduplicatedCStringSection::StringOffset
 DeduplicatedCStringSection::getStringOffset(StringRef str) const {
   // StringPiece uses 31 bits to store the hashes, so we replicate that
-  uint32_t hash = xxHash64(str) & 0x7fffffff;
+  uint32_t hash = xxh3_64bits(str) & 0x7fffffff;
   auto offset = stringOffsetMap.find(CachedHashStringRef(str, hash));
   assert(offset != stringOffsetMap.end() &&
          "Looked-up strings should always exist in section");

diff  --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index 563118231d1cf1..5dc67fb4198bb9 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -1190,14 +1190,14 @@ void Writer::writeUuid() {
   threadFutures.reserve(chunks.size());
   for (size_t i = 0; i < chunks.size(); ++i)
     threadFutures.emplace_back(threadPool.async(
-        [&](size_t j) { hashes[j] = xxHash64(chunks[j]); }, i));
+        [&](size_t j) { hashes[j] = xxh3_64bits(chunks[j]); }, i));
   for (std::shared_future<void> &future : threadFutures)
     future.wait();
   // Append the output filename so that identical binaries with 
diff erent names
   // don't get the same UUID.
-  hashes[chunks.size()] = xxHash64(sys::path::filename(config->finalOutput));
-  uint64_t digest = xxHash64({reinterpret_cast<uint8_t *>(hashes.data()),
-                              hashes.size() * sizeof(uint64_t)});
+  hashes[chunks.size()] = xxh3_64bits(sys::path::filename(config->finalOutput));
+  uint64_t digest = xxh3_64bits({reinterpret_cast<uint8_t *>(hashes.data()),
+                                 hashes.size() * sizeof(uint64_t)});
   uuidCommand->writeUuid(digest);
 }
 


        


More information about the llvm-commits mailing list