[lld] c7af9ae - Reapply: [WebAssembly] Implement build-id feature

Derek Schuff via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 3 17:19:20 PST 2023


Author: Derek Schuff
Date: 2023-03-03T17:15:55-08:00
New Revision: c7af9ae577bb04c5fe120fc07844a500818c8f47

URL: https://github.com/llvm/llvm-project/commit/c7af9ae577bb04c5fe120fc07844a500818c8f47
DIFF: https://github.com/llvm/llvm-project/commit/c7af9ae577bb04c5fe120fc07844a500818c8f47.diff

LOG: Reapply: [WebAssembly] Implement build-id feature

Implement the --build-id flag similarly to ELF, and generate a
build_id section according to the WebAssembly tool convention
specified in https://github.com/WebAssembly/tool-conventions/pull/183

The default style ("fast" aka "tree") hashes the contents of the
output and (unlike ELF) generates a v5 UUID based on the hash (using a
random namespace). It also supports generating a random v4 UUID, a
sha1 hash, and a user-specified string (as ELF does).

Differential Revision: https://reviews.llvm.org/D107662

Fix MSVC build by std::copy on the underying buffer rather than
directly from std::array to llvm::MutableArrayRef

Added: 
    lld/test/wasm/build-id.test

Modified: 
    lld/wasm/Config.h
    lld/wasm/Driver.cpp
    lld/wasm/Options.td
    lld/wasm/SyntheticSections.cpp
    lld/wasm/SyntheticSections.h
    lld/wasm/Writer.cpp

Removed: 
    


################################################################################
diff  --git a/lld/test/wasm/build-id.test b/lld/test/wasm/build-id.test
new file mode 100644
index 0000000000000..a15f4cedcdddd
--- /dev/null
+++ b/lld/test/wasm/build-id.test
@@ -0,0 +1,60 @@
+# RUN: llvm-mc -filetype=obj -triple=wasm32 %p/Inputs/start.s -o %t
+
+# RUN: wasm-ld --build-id %t -o %t2
+# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=DEFAULT %s
+# RUN: wasm-ld  --build-id=fast %t -o %t2
+# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=DEFAULT %s
+# RUN: wasm-ld  --build-id %t -o %t2 --threads=1
+# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=DEFAULT %s
+
+# RUN: wasm-ld  --build-id=sha1 %t -o %t2
+# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=SHA1 %s
+# RUN: wasm-ld  --build-id=sha1 %t -o %t2 --threads=1
+# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=SHA1 %s
+
+# RUN: wasm-ld  --build-id=tree %t -o %t2
+# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=SHA1 %s
+# RUN: wasm-ld  --build-id=tree %t -o %t2 --threads=1
+# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=SHA1 %s
+
+# RUN: wasm-ld  --build-id=uuid %t -o %t2
+# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=UUID %s
+
+# RUN: wasm-ld  --build-id=0x12345678 %t -o %t2
+# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=HEX %s
+
+# RUN: wasm-ld  %t -o %t2
+# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=NONE %s
+
+# RUN: wasm-ld  --build-id=sha1 --build-id=none %t -o %t2
+# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=NONE %s
+# RUN: wasm-ld  --build-id --build-id=none %t -o %t2
+# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=NONE %s
+# RUN: wasm-ld  --build-id=none --build-id %t -o %t2
+# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=DEFAULT %s
+
+.section .data.foo,"",@
+        .globl  foo
+        .hidden  foo
+        .p2align        2
+foo:
+        .int32  1
+        .size   foo, 4
+
+
+# DEFAULT:      Contents of section build_id:
+# DEFAULT-NEXT: 0000 10cdbf99 f76b1f5e ebb2f36a 1bde1d6c  .....k.^...j...l
+# DEFAULT-NEXT: 0010 01
+
+# SHA1:      Contents of section build_id:
+# SHA1-NEXT: 0000 14ad22e8 54d72438 94af85de 3c5592bd  ..".T.$8....<U..
+# SHA1-NEXT: 0010 1b5ec96f 6b                          .^.ok
+
+# UUID:      Contents of section build_id:
+# UUID-NEXT: 0000 10
+
+# HEX:      Contents of section build_id:
+# HEX-NEXT:  0000 04123456 78                          ..4Vx
+
+
+# NONE-NOT: Contents of section build_id:

diff  --git a/lld/wasm/Config.h b/lld/wasm/Config.h
index 87217dddcb1eb..ea19a21172bb1 100644
--- a/lld/wasm/Config.h
+++ b/lld/wasm/Config.h
@@ -26,6 +26,9 @@ namespace wasm {
 // For --unresolved-symbols.
 enum class UnresolvedPolicy { ReportError, Warn, Ignore, ImportDynamic };
 
+// For --build-id.
+enum class BuildIdKind { None, Fast, Sha1, Hexstring, Uuid };
+
 // This struct contains the global configuration for the linker.
 // Most fields are direct mapping from the command line options
 // and such fields have the same name as the corresponding options.
@@ -72,6 +75,7 @@ struct Configuration {
   llvm::StringRef thinLTOJobs;
   bool ltoDebugPassManager;
   UnresolvedPolicy unresolvedSymbols;
+  BuildIdKind buildId = BuildIdKind::None;
 
   llvm::StringRef entry;
   llvm::StringRef mapFile;
@@ -85,6 +89,7 @@ struct Configuration {
   llvm::CachePruningPolicy thinLTOCachePolicy;
   std::optional<std::vector<std::string>> features;
   std::optional<std::vector<std::string>> extraFeatures;
+  llvm::SmallVector<uint8_t, 0> buildIdVector;
 
   // The following config options do not directly correspond to any
   // particular command line options.

diff  --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index 709fd9923b531..ea9299681d87f 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -385,6 +385,33 @@ static UnresolvedPolicy getUnresolvedSymbolPolicy(opt::InputArgList &args) {
   return errorOrWarn;
 }
 
+// Parse --build-id or --build-id=<style>. We handle "tree" as a
+// synonym for "sha1" because all our hash functions including
+// -build-id=sha1 are actually tree hashes for performance reasons.
+static std::pair<BuildIdKind, SmallVector<uint8_t, 0>>
+getBuildId(opt::InputArgList &args) {
+  auto *arg = args.getLastArg(OPT_build_id, OPT_build_id_eq);
+  if (!arg)
+    return {BuildIdKind::None, {}};
+
+  if (arg->getOption().getID() == OPT_build_id)
+    return {BuildIdKind::Fast, {}};
+
+  StringRef s = arg->getValue();
+  if (s == "fast")
+    return {BuildIdKind::Fast, {}};
+  if (s == "sha1" || s == "tree")
+    return {BuildIdKind::Sha1, {}};
+  if (s == "uuid")
+    return {BuildIdKind::Uuid, {}};
+  if (s.startswith("0x"))
+    return {BuildIdKind::Hexstring, parseHex(s.substr(2))};
+
+  if (s != "none")
+    error("unknown --build-id style: " + s);
+  return {BuildIdKind::None, {}};
+}
+
 // Initializes Config members by the command line options.
 static void readConfigs(opt::InputArgList &args) {
   config->bsymbolic = args.hasArg(OPT_Bsymbolic);
@@ -519,6 +546,8 @@ static void readConfigs(opt::InputArgList &args) {
 
   if (args.hasArg(OPT_print_map))
     config->mapFile = "-";
+
+  std::tie(config->buildId, config->buildIdVector) = getBuildId(args);
 }
 
 // Some Config members do not directly correspond to any particular

diff  --git a/lld/wasm/Options.td b/lld/wasm/Options.td
index b30ae99e6eb0f..5fe1e7e5c55e7 100644
--- a/lld/wasm/Options.td
+++ b/lld/wasm/Options.td
@@ -42,6 +42,11 @@ def Bdynamic: F<"Bdynamic">, HelpText<"Link against shared libraries (default)">
 
 def Bstatic: F<"Bstatic">, HelpText<"Do not link against shared libraries">;
 
+def build_id: F<"build-id">, HelpText<"Alias for --build-id=fast">;
+
+def build_id_eq: J<"build-id=">, HelpText<"Generate build ID note">,
+  MetaVarName<"[fast,sha1,uuid,0x<hexstring>]">;
+
 defm color_diagnostics: B<"color-diagnostics",
   "Alias for --color-diagnostics=always",
   "Alias for --color-diagnostics=never">;

diff  --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp
index 5808ebb8da3d0..85d1a99e88b5a 100644
--- a/lld/wasm/SyntheticSections.cpp
+++ b/lld/wasm/SyntheticSections.cpp
@@ -888,5 +888,39 @@ void RelocSection::writeBody() {
   sec->writeRelocations(bodyOutputStream);
 }
 
+static size_t getHashSize() {
+  switch (config->buildId) {
+  case BuildIdKind::Fast:
+  case BuildIdKind::Uuid:
+    return 16;
+  case BuildIdKind::Sha1:
+    return 20;
+  case BuildIdKind::Hexstring:
+    return config->buildIdVector.size();
+  case BuildIdKind::None:
+    return 0;
+  }
+}
+
+BuildIdSection::BuildIdSection()
+    : SyntheticSection(llvm::wasm::WASM_SEC_CUSTOM, buildIdSectionName),
+      hashSize(getHashSize()) {}
+
+void BuildIdSection::writeBody() {
+  LLVM_DEBUG(llvm::dbgs() << "BuildId writebody\n");
+  // Write hash size
+  auto &os = bodyOutputStream;
+  writeUleb128(os, hashSize, "build id size");
+  writeBytes(os, std::vector<char>(hashSize, ' ').data(), hashSize,
+             "placeholder");
+}
+
+void BuildIdSection::writeBuildId(llvm::ArrayRef<uint8_t> buf) {
+  assert(buf.size() == hashSize);
+  LLVM_DEBUG(dbgs() << "buildid write " << buf.size() << " "
+                    << hashPlaceholderPtr << '\n');
+  memcpy(hashPlaceholderPtr, buf.data(), hashSize);
+}
+
 } // namespace wasm
 } // namespace lld

diff  --git a/lld/wasm/SyntheticSections.h b/lld/wasm/SyntheticSections.h
index bda3f8eacd819..f4e990429bb9c 100644
--- a/lld/wasm/SyntheticSections.h
+++ b/lld/wasm/SyntheticSections.h
@@ -429,6 +429,35 @@ class RelocSection : public SyntheticSection {
   OutputSection *sec;
 };
 
+class BuildIdSection : public SyntheticSection {
+public:
+  BuildIdSection();
+  void writeBody() override;
+  bool isNeeded() const override {
+    return config->buildId != BuildIdKind::None;
+  }
+  void writeBuildId(llvm::ArrayRef<uint8_t> buf);
+  void writeTo(uint8_t *buf) override {
+    LLVM_DEBUG(llvm::dbgs()
+               << "BuildId writeto buf " << buf << " offset " << offset
+               << " headersize " << header.size() << '\n');
+    // The actual build ID is derived from a hash of all of the output
+    // sections, so it can't be calculated until they are written. Here
+    // we write the section leaving zeros in place of the hash.
+    SyntheticSection::writeTo(buf);
+    // Calculate and store the location where the hash will be written.
+    hashPlaceholderPtr = buf + offset + header.size() +
+                         +sizeof(buildIdSectionName) /*name string*/ +
+                         1 /* hash size */;
+  }
+
+  const uint32_t hashSize;
+
+private:
+  static constexpr char buildIdSectionName[] = "build_id";
+  uint8_t *hashPlaceholderPtr = nullptr;
+};
+
 // Linker generated output sections
 struct OutStruct {
   DylinkSection *dylinkSec;
@@ -447,6 +476,7 @@ struct OutStruct {
   NameSection *nameSec;
   ProducersSection *producersSec;
   TargetFeaturesSection *targetFeaturesSec;
+  BuildIdSection *buildIdSec;
 };
 
 extern OutStruct out;

diff  --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index 339448af6da24..e82d70c9678d9 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -17,8 +17,10 @@
 #include "SymbolTable.h"
 #include "SyntheticSections.h"
 #include "WriterUtils.h"
+#include "lld/Common/Arrays.h"
 #include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/Strings.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
@@ -30,6 +32,9 @@
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/Parallel.h"
+#include "llvm/Support/RandomNumberGenerator.h"
+#include "llvm/Support/SHA1.h"
+#include "llvm/Support/xxhash.h"
 
 #include <cstdarg>
 #include <map>
@@ -103,6 +108,7 @@ class Writer {
 
   void writeHeader();
   void writeSections();
+  void writeBuildId();
 
   uint64_t fileSize = 0;
 
@@ -219,6 +225,91 @@ void Writer::writeSections() {
   });
 }
 
+// Computes a hash value of Data using a given hash function.
+// In order to utilize multiple cores, we first split data into 1MB
+// chunks, compute a hash for each chunk, and then compute a hash value
+// of the hash values.
+
+static void
+computeHash(llvm::MutableArrayRef<uint8_t> hashBuf,
+            llvm::ArrayRef<uint8_t> data,
+            std::function<void(uint8_t *dest, ArrayRef<uint8_t> arr)> hashFn) {
+  std::vector<ArrayRef<uint8_t>> chunks = split(data, 1024 * 1024);
+  std::vector<uint8_t> hashes(chunks.size() * hashBuf.size());
+
+  // Compute hash values.
+  parallelFor(0, chunks.size(), [&](size_t i) {
+    hashFn(hashes.data() + i * hashBuf.size(), chunks[i]);
+  });
+
+  // Write to the final output buffer.
+  hashFn(hashBuf.data(), hashes);
+}
+
+static void makeUUID(unsigned version, llvm::ArrayRef<uint8_t> fileHash,
+                     llvm::MutableArrayRef<uint8_t> output) {
+  assert(version == 4 || version == 5 && "Unknown UUID version");
+  assert(output.size() == 16 && "Wrong size for UUID output");
+  if (version == 5) {
+    // Build a valid v5 UUID from a hardcoded (randomly-generated) namespace
+    // UUID, and the computed hash of the output.
+    std::array<uint8_t, 16> namespaceUUID{0xA1, 0xFA, 0x48, 0x2D, 0x0E, 0x22,
+                                          0x03, 0x8D, 0x33, 0x8B, 0x52, 0x1C,
+                                          0xD6, 0xD2, 0x12, 0xB2};
+    SHA1 sha;
+    sha.update(namespaceUUID);
+    sha.update(fileHash);
+    auto s = sha.final();
+    std::copy(s.data(), &s.data()[output.size()], output.data());
+  } else if (version == 4) {
+    if (auto ec = llvm::getRandomBytes(output.data(), output.size()))
+      error("entropy source failure: " + ec.message());
+  }
+  // Set the UUID version and variant fields.
+  // The version is the upper nibble of byte 6 (0b0101xxxx or 0b0100xxxx)
+  output[6] = (static_cast<uint8_t>(version) << 4) | (output[6] & 0xF);
+
+  // The variant is DCE 1.1/ISO 11578 (0b10xxxxxx)
+  output[8] &= 0xBF;
+  output[8] |= 0x80;
+}
+
+void Writer::writeBuildId() {
+  if (!out.buildIdSec->isNeeded())
+    return;
+  if (config->buildId == BuildIdKind::Hexstring) {
+    out.buildIdSec->writeBuildId(config->buildIdVector);
+    return;
+  }
+
+  // Compute a hash of all sections of the output file.
+  size_t hashSize = out.buildIdSec->hashSize;
+  std::vector<uint8_t> buildId(hashSize);
+  llvm::ArrayRef<uint8_t> buf{buffer->getBufferStart(), size_t(fileSize)};
+
+  switch (config->buildId) {
+  case BuildIdKind::Fast: {
+    std::vector<uint8_t> fileHash(8);
+    computeHash(fileHash, buf, [](uint8_t *dest, ArrayRef<uint8_t> arr) {
+      support::endian::write64le(dest, xxHash64(arr));
+    });
+    makeUUID(5, fileHash, buildId);
+    break;
+  }
+  case BuildIdKind::Sha1:
+    computeHash(buildId, buf, [&](uint8_t *dest, ArrayRef<uint8_t> arr) {
+      memcpy(dest, SHA1::hash(arr).data(), hashSize);
+    });
+    break;
+  case BuildIdKind::Uuid:
+    makeUUID(4, {}, buildId);
+    break;
+  default:
+    llvm_unreachable("unknown BuildIdKind");
+  }
+  out.buildIdSec->writeBuildId(buildId);
+}
+
 static void setGlobalPtr(DefinedGlobal *g, uint64_t memoryPtr) {
   LLVM_DEBUG(dbgs() << "setGlobalPtr " << g->getName() << " -> " << memoryPtr << "\n");
   g->global->setPointerValue(memoryPtr);
@@ -456,6 +547,7 @@ void Writer::addSections() {
   addSection(out.nameSec);
   addSection(out.producersSec);
   addSection(out.targetFeaturesSec);
+  addSection(out.buildIdSec);
 }
 
 void Writer::finalizeSections() {
@@ -1577,6 +1669,7 @@ void Writer::createSyntheticSections() {
   out.elemSec = make<ElemSection>();
   out.producersSec = make<ProducersSection>();
   out.targetFeaturesSec = make<TargetFeaturesSection>();
+  out.buildIdSec = make<BuildIdSection>();
 }
 
 void Writer::createSyntheticSectionsPostLayout() {
@@ -1738,6 +1831,7 @@ void Writer::run() {
 
   log("-- writeSections");
   writeSections();
+  writeBuildId();
   if (errorCount())
     return;
 


        


More information about the llvm-commits mailing list