[lld] r358536 - ELF: Move build id computation to Writer. NFCI.

Tue Apr 16 15:45:15 PDT 2019

Author: pcc
Date: Tue Apr 16 15:45:14 2019
New Revision: 358536

URL: http://llvm.org/viewvc/llvm-project?rev=358536&view=rev
Log:
ELF: Move build id computation to Writer. NFCI.

With partitions, each partition should have the same build id. This means
that the build id needs to be only computed once, otherwise we will end up
with different build ids in each partition as a result of the file contents
changing. This change moves the computation of the build id into Writer so
that it only happens once.

Differential Revision: https://reviews.llvm.org/D60342

Modified:
    lld/trunk/ELF/SyntheticSections.cpp
    lld/trunk/ELF/SyntheticSections.h
    lld/trunk/ELF/Writer.cpp

Modified: lld/trunk/ELF/SyntheticSections.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/SyntheticSections.cpp?rev=358536&r1=358535&r2=358536&view=diff
==============================================================================

--- lld/trunk/ELF/SyntheticSections.cpp (original)
+++ lld/trunk/ELF/SyntheticSections.cpp Tue Apr 16 15:45:14 2019
@@ -36,9 +36,6 @@
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/MD5.h"
-#include "llvm/Support/RandomNumberGenerator.h"
-#include "llvm/Support/SHA1.h"
-#include "llvm/Support/xxhash.h"
 #include <cstdlib>
 #include <thread>
 
@@ -303,36 +300,9 @@ void BuildIdSection::writeTo(uint8_t *Bu
   HashBuf = Buf + 16;
 }
 
-// Split one uint8 array into small pieces of uint8 arrays.
-static std::vector<ArrayRef<uint8_t>> split(ArrayRef<uint8_t> Arr,
-                                            size_t ChunkSize) {
-  std::vector<ArrayRef<uint8_t>> Ret;
-  while (Arr.size() > ChunkSize) {
-    Ret.push_back(Arr.take_front(ChunkSize));
-    Arr = Arr.drop_front(ChunkSize);
-  }
-  if (!Arr.empty())
-    Ret.push_back(Arr);
-  return Ret;
-}
-
-// Computes a hash value of Data using a given hash function.
-// In order to utilize multiple cores, we first split data into 1MB
-// chunks, compute a hash for each chunk, and then compute a hash value
-// of the hash values.
-void BuildIdSection::computeHash(
-    llvm::ArrayRef<uint8_t> Data,
-    std::function<void(uint8_t *Dest, ArrayRef<uint8_t> Arr)> HashFn) {
-  std::vector<ArrayRef<uint8_t>> Chunks = split(Data, 1024 * 1024);
-  std::vector<uint8_t> Hashes(Chunks.size() * HashSize);
-
-  // Compute hash values.
-  parallelForEachN(0, Chunks.size(), [&](size_t I) {
-    HashFn(Hashes.data() + I * HashSize, Chunks[I]);
-  });
-
-  // Write to the final output buffer.
-  HashFn(HashBuf, Hashes);
+void BuildIdSection::writeBuildId(ArrayRef<uint8_t> Buf) {
+  assert(Buf.size() == HashSize);
+  memcpy(HashBuf, Buf.data(), HashSize);
 }
 
 BssSection::BssSection(StringRef Name, uint64_t Size, uint32_t Alignment)
@@ -341,35 +311,6 @@ BssSection::BssSection(StringRef Name, u
   this->Size = Size;
 }
 
-void BuildIdSection::writeBuildId(ArrayRef<uint8_t> Buf) {
-  switch (Config->BuildId) {
-  case BuildIdKind::Fast:
-    computeHash(Buf, [](uint8_t *Dest, ArrayRef<uint8_t> Arr) {
-      write64le(Dest, xxHash64(Arr));
-    });
-    break;
-  case BuildIdKind::Md5:
-    computeHash(Buf, [](uint8_t *Dest, ArrayRef<uint8_t> Arr) {
-      memcpy(Dest, MD5::hash(Arr).data(), 16);
-    });
-    break;
-  case BuildIdKind::Sha1:
-    computeHash(Buf, [](uint8_t *Dest, ArrayRef<uint8_t> Arr) {
-      memcpy(Dest, SHA1::hash(Arr).data(), 20);
-    });
-    break;
-  case BuildIdKind::Uuid:
-    if (auto EC = getRandomBytes(HashBuf, HashSize))
-      error("entropy source failure: " + EC.message());
-    break;
-  case BuildIdKind::Hexstring:
-    memcpy(HashBuf, Config->BuildIdVector.data(), Config->BuildIdVector.size());
-    break;
-  default:
-    llvm_unreachable("unknown BuildIdKind");
-  }
-}
-
 EhFrameSection::EhFrameSection()
     : SyntheticSection(SHF_ALLOC, SHT_PROGBITS, 1, ".eh_frame") {}
 

Modified: lld/trunk/ELF/SyntheticSections.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/SyntheticSections.h?rev=358536&r1=358535&r2=358536&view=diff
==============================================================================
--- lld/trunk/ELF/SyntheticSections.h (original)
+++ lld/trunk/ELF/SyntheticSections.h Tue Apr 16 15:45:14 2019
@@ -154,9 +154,6 @@ public:
   void writeBuildId(llvm::ArrayRef<uint8_t> Buf);
 
 private:
-  void computeHash(llvm::ArrayRef<uint8_t> Buf,
-                   std::function<void(uint8_t *, ArrayRef<uint8_t>)> Hash);
-
   size_t HashSize;
   uint8_t *HashBuf;
 };

Modified: lld/trunk/ELF/Writer.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Writer.cpp?rev=358536&r1=358535&r2=358536&view=diff
==============================================================================
--- lld/trunk/ELF/Writer.cpp (original)
+++ lld/trunk/ELF/Writer.cpp Tue Apr 16 15:45:14 2019
@@ -24,6 +24,9 @@
 #include "lld/Common/Threads.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/RandomNumberGenerator.h"
+#include "llvm/Support/SHA1.h"
+#include "llvm/Support/xxhash.h"
 #include <climits>
 
 using namespace llvm;
@@ -2495,12 +2498,82 @@ template <class ELFT> void Writer<ELFT>:
       Sec->writeTo<ELFT>(Out::BufferStart + Sec->Offset);
 }
 
+// Split one uint8 array into small pieces of uint8 arrays.
+static std::vector<ArrayRef<uint8_t>> split(ArrayRef<uint8_t> Arr,
+                                            size_t ChunkSize) {
+  std::vector<ArrayRef<uint8_t>> Ret;
+  while (Arr.size() > ChunkSize) {
+    Ret.push_back(Arr.take_front(ChunkSize));
+    Arr = Arr.drop_front(ChunkSize);
+  }
+  if (!Arr.empty())
+    Ret.push_back(Arr);
+  return Ret;
+}
+
+// Computes a hash value of Data using a given hash function.
+// In order to utilize multiple cores, we first split data into 1MB
+// chunks, compute a hash for each chunk, and then compute a hash value
+// of the hash values.
+static void
+computeHash(llvm::MutableArrayRef<uint8_t> HashBuf,
+            llvm::ArrayRef<uint8_t> Data,
+            std::function<void(uint8_t *Dest, ArrayRef<uint8_t> Arr)> HashFn) {
+  std::vector<ArrayRef<uint8_t>> Chunks = split(Data, 1024 * 1024);
+  std::vector<uint8_t> Hashes(Chunks.size() * HashBuf.size());
+
+  // Compute hash values.
+  parallelForEachN(0, Chunks.size(), [&](size_t I) {
+    HashFn(Hashes.data() + I * HashBuf.size(), Chunks[I]);
+  });
+
+  // Write to the final output buffer.
+  HashFn(HashBuf.data(), Hashes);
+}
+
+static std::vector<uint8_t> computeBuildId(llvm::ArrayRef<uint8_t> Buf) {
+  std::vector<uint8_t> BuildId;
+  switch (Config->BuildId) {
+  case BuildIdKind::Fast:
+    BuildId.resize(8);
+    computeHash(BuildId, Buf, [](uint8_t *Dest, ArrayRef<uint8_t> Arr) {
+      write64le(Dest, xxHash64(Arr));
+    });
+    break;
+  case BuildIdKind::Md5:
+    BuildId.resize(16);
+    computeHash(BuildId, Buf, [](uint8_t *Dest, ArrayRef<uint8_t> Arr) {
+      memcpy(Dest, MD5::hash(Arr).data(), 16);
+    });
+    break;
+  case BuildIdKind::Sha1:
+    BuildId.resize(20);
+    computeHash(BuildId, Buf, [](uint8_t *Dest, ArrayRef<uint8_t> Arr) {
+      memcpy(Dest, SHA1::hash(Arr).data(), 20);
+    });
+    break;
+  case BuildIdKind::Uuid:
+    BuildId.resize(16);
+    if (auto EC = llvm::getRandomBytes(BuildId.data(), 16))
+      error("entropy source failure: " + EC.message());
+    break;
+  case BuildIdKind::Hexstring:
+    BuildId = Config->BuildIdVector;
+    break;
+  default:
+    llvm_unreachable("unknown BuildIdKind");
+  }
+  return BuildId;
+}
+
 template <class ELFT> void Writer<ELFT>::writeBuildId() {
   if (!In.BuildId || !In.BuildId->getParent())
     return;
 
   // Compute a hash of all sections of the output file.
-  In.BuildId->writeBuildId({Out::BufferStart, size_t(FileSize)});
+  std::vector<uint8_t> BuildId =
+      computeBuildId({Out::BufferStart, size_t(FileSize)});
+  In.BuildId->writeBuildId(BuildId);
 }
 
 template void elf::writeResult<ELF32LE>();