[lld] c7af9ae - Reapply: [WebAssembly] Implement build-id feature
Derek Schuff via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 3 17:19:20 PST 2023
Author: Derek Schuff
Date: 2023-03-03T17:15:55-08:00
New Revision: c7af9ae577bb04c5fe120fc07844a500818c8f47
URL: https://github.com/llvm/llvm-project/commit/c7af9ae577bb04c5fe120fc07844a500818c8f47
DIFF: https://github.com/llvm/llvm-project/commit/c7af9ae577bb04c5fe120fc07844a500818c8f47.diff
LOG: Reapply: [WebAssembly] Implement build-id feature
Implement the --build-id flag similarly to ELF, and generate a
build_id section according to the WebAssembly tool convention
specified in https://github.com/WebAssembly/tool-conventions/pull/183
The default style ("fast" aka "tree") hashes the contents of the
output and (unlike ELF) generates a v5 UUID based on the hash (using a
random namespace). It also supports generating a random v4 UUID, a
sha1 hash, and a user-specified string (as ELF does).
Differential Revision: https://reviews.llvm.org/D107662
Fix MSVC build by std::copy on the underying buffer rather than
directly from std::array to llvm::MutableArrayRef
Added:
lld/test/wasm/build-id.test
Modified:
lld/wasm/Config.h
lld/wasm/Driver.cpp
lld/wasm/Options.td
lld/wasm/SyntheticSections.cpp
lld/wasm/SyntheticSections.h
lld/wasm/Writer.cpp
Removed:
################################################################################
diff --git a/lld/test/wasm/build-id.test b/lld/test/wasm/build-id.test
new file mode 100644
index 0000000000000..a15f4cedcdddd
--- /dev/null
+++ b/lld/test/wasm/build-id.test
@@ -0,0 +1,60 @@
+# RUN: llvm-mc -filetype=obj -triple=wasm32 %p/Inputs/start.s -o %t
+
+# RUN: wasm-ld --build-id %t -o %t2
+# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=DEFAULT %s
+# RUN: wasm-ld --build-id=fast %t -o %t2
+# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=DEFAULT %s
+# RUN: wasm-ld --build-id %t -o %t2 --threads=1
+# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=DEFAULT %s
+
+# RUN: wasm-ld --build-id=sha1 %t -o %t2
+# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=SHA1 %s
+# RUN: wasm-ld --build-id=sha1 %t -o %t2 --threads=1
+# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=SHA1 %s
+
+# RUN: wasm-ld --build-id=tree %t -o %t2
+# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=SHA1 %s
+# RUN: wasm-ld --build-id=tree %t -o %t2 --threads=1
+# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=SHA1 %s
+
+# RUN: wasm-ld --build-id=uuid %t -o %t2
+# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=UUID %s
+
+# RUN: wasm-ld --build-id=0x12345678 %t -o %t2
+# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=HEX %s
+
+# RUN: wasm-ld %t -o %t2
+# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=NONE %s
+
+# RUN: wasm-ld --build-id=sha1 --build-id=none %t -o %t2
+# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=NONE %s
+# RUN: wasm-ld --build-id --build-id=none %t -o %t2
+# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=NONE %s
+# RUN: wasm-ld --build-id=none --build-id %t -o %t2
+# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=DEFAULT %s
+
+.section .data.foo,"",@
+ .globl foo
+ .hidden foo
+ .p2align 2
+foo:
+ .int32 1
+ .size foo, 4
+
+
+# DEFAULT: Contents of section build_id:
+# DEFAULT-NEXT: 0000 10cdbf99 f76b1f5e ebb2f36a 1bde1d6c .....k.^...j...l
+# DEFAULT-NEXT: 0010 01
+
+# SHA1: Contents of section build_id:
+# SHA1-NEXT: 0000 14ad22e8 54d72438 94af85de 3c5592bd ..".T.$8....<U..
+# SHA1-NEXT: 0010 1b5ec96f 6b .^.ok
+
+# UUID: Contents of section build_id:
+# UUID-NEXT: 0000 10
+
+# HEX: Contents of section build_id:
+# HEX-NEXT: 0000 04123456 78 ..4Vx
+
+
+# NONE-NOT: Contents of section build_id:
diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h
index 87217dddcb1eb..ea19a21172bb1 100644
--- a/lld/wasm/Config.h
+++ b/lld/wasm/Config.h
@@ -26,6 +26,9 @@ namespace wasm {
// For --unresolved-symbols.
enum class UnresolvedPolicy { ReportError, Warn, Ignore, ImportDynamic };
+// For --build-id.
+enum class BuildIdKind { None, Fast, Sha1, Hexstring, Uuid };
+
// This struct contains the global configuration for the linker.
// Most fields are direct mapping from the command line options
// and such fields have the same name as the corresponding options.
@@ -72,6 +75,7 @@ struct Configuration {
llvm::StringRef thinLTOJobs;
bool ltoDebugPassManager;
UnresolvedPolicy unresolvedSymbols;
+ BuildIdKind buildId = BuildIdKind::None;
llvm::StringRef entry;
llvm::StringRef mapFile;
@@ -85,6 +89,7 @@ struct Configuration {
llvm::CachePruningPolicy thinLTOCachePolicy;
std::optional<std::vector<std::string>> features;
std::optional<std::vector<std::string>> extraFeatures;
+ llvm::SmallVector<uint8_t, 0> buildIdVector;
// The following config options do not directly correspond to any
// particular command line options.
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index 709fd9923b531..ea9299681d87f 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -385,6 +385,33 @@ static UnresolvedPolicy getUnresolvedSymbolPolicy(opt::InputArgList &args) {
return errorOrWarn;
}
+// Parse --build-id or --build-id=<style>. We handle "tree" as a
+// synonym for "sha1" because all our hash functions including
+// -build-id=sha1 are actually tree hashes for performance reasons.
+static std::pair<BuildIdKind, SmallVector<uint8_t, 0>>
+getBuildId(opt::InputArgList &args) {
+ auto *arg = args.getLastArg(OPT_build_id, OPT_build_id_eq);
+ if (!arg)
+ return {BuildIdKind::None, {}};
+
+ if (arg->getOption().getID() == OPT_build_id)
+ return {BuildIdKind::Fast, {}};
+
+ StringRef s = arg->getValue();
+ if (s == "fast")
+ return {BuildIdKind::Fast, {}};
+ if (s == "sha1" || s == "tree")
+ return {BuildIdKind::Sha1, {}};
+ if (s == "uuid")
+ return {BuildIdKind::Uuid, {}};
+ if (s.startswith("0x"))
+ return {BuildIdKind::Hexstring, parseHex(s.substr(2))};
+
+ if (s != "none")
+ error("unknown --build-id style: " + s);
+ return {BuildIdKind::None, {}};
+}
+
// Initializes Config members by the command line options.
static void readConfigs(opt::InputArgList &args) {
config->bsymbolic = args.hasArg(OPT_Bsymbolic);
@@ -519,6 +546,8 @@ static void readConfigs(opt::InputArgList &args) {
if (args.hasArg(OPT_print_map))
config->mapFile = "-";
+
+ std::tie(config->buildId, config->buildIdVector) = getBuildId(args);
}
// Some Config members do not directly correspond to any particular
diff --git a/lld/wasm/Options.td b/lld/wasm/Options.td
index b30ae99e6eb0f..5fe1e7e5c55e7 100644
--- a/lld/wasm/Options.td
+++ b/lld/wasm/Options.td
@@ -42,6 +42,11 @@ def Bdynamic: F<"Bdynamic">, HelpText<"Link against shared libraries (default)">
def Bstatic: F<"Bstatic">, HelpText<"Do not link against shared libraries">;
+def build_id: F<"build-id">, HelpText<"Alias for --build-id=fast">;
+
+def build_id_eq: J<"build-id=">, HelpText<"Generate build ID note">,
+ MetaVarName<"[fast,sha1,uuid,0x<hexstring>]">;
+
defm color_diagnostics: B<"color-diagnostics",
"Alias for --color-diagnostics=always",
"Alias for --color-diagnostics=never">;
diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp
index 5808ebb8da3d0..85d1a99e88b5a 100644
--- a/lld/wasm/SyntheticSections.cpp
+++ b/lld/wasm/SyntheticSections.cpp
@@ -888,5 +888,39 @@ void RelocSection::writeBody() {
sec->writeRelocations(bodyOutputStream);
}
+static size_t getHashSize() {
+ switch (config->buildId) {
+ case BuildIdKind::Fast:
+ case BuildIdKind::Uuid:
+ return 16;
+ case BuildIdKind::Sha1:
+ return 20;
+ case BuildIdKind::Hexstring:
+ return config->buildIdVector.size();
+ case BuildIdKind::None:
+ return 0;
+ }
+}
+
+BuildIdSection::BuildIdSection()
+ : SyntheticSection(llvm::wasm::WASM_SEC_CUSTOM, buildIdSectionName),
+ hashSize(getHashSize()) {}
+
+void BuildIdSection::writeBody() {
+ LLVM_DEBUG(llvm::dbgs() << "BuildId writebody\n");
+ // Write hash size
+ auto &os = bodyOutputStream;
+ writeUleb128(os, hashSize, "build id size");
+ writeBytes(os, std::vector<char>(hashSize, ' ').data(), hashSize,
+ "placeholder");
+}
+
+void BuildIdSection::writeBuildId(llvm::ArrayRef<uint8_t> buf) {
+ assert(buf.size() == hashSize);
+ LLVM_DEBUG(dbgs() << "buildid write " << buf.size() << " "
+ << hashPlaceholderPtr << '\n');
+ memcpy(hashPlaceholderPtr, buf.data(), hashSize);
+}
+
} // namespace wasm
} // namespace lld
diff --git a/lld/wasm/SyntheticSections.h b/lld/wasm/SyntheticSections.h
index bda3f8eacd819..f4e990429bb9c 100644
--- a/lld/wasm/SyntheticSections.h
+++ b/lld/wasm/SyntheticSections.h
@@ -429,6 +429,35 @@ class RelocSection : public SyntheticSection {
OutputSection *sec;
};
+class BuildIdSection : public SyntheticSection {
+public:
+ BuildIdSection();
+ void writeBody() override;
+ bool isNeeded() const override {
+ return config->buildId != BuildIdKind::None;
+ }
+ void writeBuildId(llvm::ArrayRef<uint8_t> buf);
+ void writeTo(uint8_t *buf) override {
+ LLVM_DEBUG(llvm::dbgs()
+ << "BuildId writeto buf " << buf << " offset " << offset
+ << " headersize " << header.size() << '\n');
+ // The actual build ID is derived from a hash of all of the output
+ // sections, so it can't be calculated until they are written. Here
+ // we write the section leaving zeros in place of the hash.
+ SyntheticSection::writeTo(buf);
+ // Calculate and store the location where the hash will be written.
+ hashPlaceholderPtr = buf + offset + header.size() +
+ +sizeof(buildIdSectionName) /*name string*/ +
+ 1 /* hash size */;
+ }
+
+ const uint32_t hashSize;
+
+private:
+ static constexpr char buildIdSectionName[] = "build_id";
+ uint8_t *hashPlaceholderPtr = nullptr;
+};
+
// Linker generated output sections
struct OutStruct {
DylinkSection *dylinkSec;
@@ -447,6 +476,7 @@ struct OutStruct {
NameSection *nameSec;
ProducersSection *producersSec;
TargetFeaturesSection *targetFeaturesSec;
+ BuildIdSection *buildIdSec;
};
extern OutStruct out;
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index 339448af6da24..e82d70c9678d9 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -17,8 +17,10 @@
#include "SymbolTable.h"
#include "SyntheticSections.h"
#include "WriterUtils.h"
+#include "lld/Common/Arrays.h"
#include "lld/Common/CommonLinkerContext.h"
#include "lld/Common/Strings.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -30,6 +32,9 @@
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/Parallel.h"
+#include "llvm/Support/RandomNumberGenerator.h"
+#include "llvm/Support/SHA1.h"
+#include "llvm/Support/xxhash.h"
#include <cstdarg>
#include <map>
@@ -103,6 +108,7 @@ class Writer {
void writeHeader();
void writeSections();
+ void writeBuildId();
uint64_t fileSize = 0;
@@ -219,6 +225,91 @@ void Writer::writeSections() {
});
}
+// Computes a hash value of Data using a given hash function.
+// In order to utilize multiple cores, we first split data into 1MB
+// chunks, compute a hash for each chunk, and then compute a hash value
+// of the hash values.
+
+static void
+computeHash(llvm::MutableArrayRef<uint8_t> hashBuf,
+ llvm::ArrayRef<uint8_t> data,
+ std::function<void(uint8_t *dest, ArrayRef<uint8_t> arr)> hashFn) {
+ std::vector<ArrayRef<uint8_t>> chunks = split(data, 1024 * 1024);
+ std::vector<uint8_t> hashes(chunks.size() * hashBuf.size());
+
+ // Compute hash values.
+ parallelFor(0, chunks.size(), [&](size_t i) {
+ hashFn(hashes.data() + i * hashBuf.size(), chunks[i]);
+ });
+
+ // Write to the final output buffer.
+ hashFn(hashBuf.data(), hashes);
+}
+
+static void makeUUID(unsigned version, llvm::ArrayRef<uint8_t> fileHash,
+ llvm::MutableArrayRef<uint8_t> output) {
+ assert(version == 4 || version == 5 && "Unknown UUID version");
+ assert(output.size() == 16 && "Wrong size for UUID output");
+ if (version == 5) {
+ // Build a valid v5 UUID from a hardcoded (randomly-generated) namespace
+ // UUID, and the computed hash of the output.
+ std::array<uint8_t, 16> namespaceUUID{0xA1, 0xFA, 0x48, 0x2D, 0x0E, 0x22,
+ 0x03, 0x8D, 0x33, 0x8B, 0x52, 0x1C,
+ 0xD6, 0xD2, 0x12, 0xB2};
+ SHA1 sha;
+ sha.update(namespaceUUID);
+ sha.update(fileHash);
+ auto s = sha.final();
+ std::copy(s.data(), &s.data()[output.size()], output.data());
+ } else if (version == 4) {
+ if (auto ec = llvm::getRandomBytes(output.data(), output.size()))
+ error("entropy source failure: " + ec.message());
+ }
+ // Set the UUID version and variant fields.
+ // The version is the upper nibble of byte 6 (0b0101xxxx or 0b0100xxxx)
+ output[6] = (static_cast<uint8_t>(version) << 4) | (output[6] & 0xF);
+
+ // The variant is DCE 1.1/ISO 11578 (0b10xxxxxx)
+ output[8] &= 0xBF;
+ output[8] |= 0x80;
+}
+
+void Writer::writeBuildId() {
+ if (!out.buildIdSec->isNeeded())
+ return;
+ if (config->buildId == BuildIdKind::Hexstring) {
+ out.buildIdSec->writeBuildId(config->buildIdVector);
+ return;
+ }
+
+ // Compute a hash of all sections of the output file.
+ size_t hashSize = out.buildIdSec->hashSize;
+ std::vector<uint8_t> buildId(hashSize);
+ llvm::ArrayRef<uint8_t> buf{buffer->getBufferStart(), size_t(fileSize)};
+
+ switch (config->buildId) {
+ case BuildIdKind::Fast: {
+ std::vector<uint8_t> fileHash(8);
+ computeHash(fileHash, buf, [](uint8_t *dest, ArrayRef<uint8_t> arr) {
+ support::endian::write64le(dest, xxHash64(arr));
+ });
+ makeUUID(5, fileHash, buildId);
+ break;
+ }
+ case BuildIdKind::Sha1:
+ computeHash(buildId, buf, [&](uint8_t *dest, ArrayRef<uint8_t> arr) {
+ memcpy(dest, SHA1::hash(arr).data(), hashSize);
+ });
+ break;
+ case BuildIdKind::Uuid:
+ makeUUID(4, {}, buildId);
+ break;
+ default:
+ llvm_unreachable("unknown BuildIdKind");
+ }
+ out.buildIdSec->writeBuildId(buildId);
+}
+
static void setGlobalPtr(DefinedGlobal *g, uint64_t memoryPtr) {
LLVM_DEBUG(dbgs() << "setGlobalPtr " << g->getName() << " -> " << memoryPtr << "\n");
g->global->setPointerValue(memoryPtr);
@@ -456,6 +547,7 @@ void Writer::addSections() {
addSection(out.nameSec);
addSection(out.producersSec);
addSection(out.targetFeaturesSec);
+ addSection(out.buildIdSec);
}
void Writer::finalizeSections() {
@@ -1577,6 +1669,7 @@ void Writer::createSyntheticSections() {
out.elemSec = make<ElemSection>();
out.producersSec = make<ProducersSection>();
out.targetFeaturesSec = make<TargetFeaturesSection>();
+ out.buildIdSec = make<BuildIdSection>();
}
void Writer::createSyntheticSectionsPostLayout() {
@@ -1738,6 +1831,7 @@ void Writer::run() {
log("-- writeSections");
writeSections();
+ writeBuildId();
if (errorCount())
return;
More information about the llvm-commits
mailing list