[lld] b869081 - [lld-macho] Implement LC_UUID

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 10 12:19:43 PST 2020


Author: Jez Ng
Date: 2020-11-10T12:19:28-08:00
New Revision: b86908171ea81a57f11fb29960328f57aaa9e7f9

URL: https://github.com/llvm/llvm-project/commit/b86908171ea81a57f11fb29960328f57aaa9e7f9
DIFF: https://github.com/llvm/llvm-project/commit/b86908171ea81a57f11fb29960328f57aaa9e7f9.diff

LOG: [lld-macho] Implement LC_UUID

Apple devtools use this to locate the dSYM files for a given
binary.

The UUID is computed based on an MD5 hash of the binary's contents. In order to
hash the contents, we must first write them, but LC_UUID itself must be part of
the written contents in order for all the offsets to be calculated correctly.
We resolve this circular paradox by first writing an LC_UUID with an all-zero
UUID, then updating the UUID with its real value later.

I'm not sure there's a good way to test that the value of the UUID is
"as expected", so I've just checked that it's present.

Reviewed By: #lld-macho, compnerd, smeenai

Differential Revision: https://reviews.llvm.org/D89418

Added: 
    

Modified: 
    lld/MachO/Writer.cpp
    lld/test/MachO/headerpad.s
    lld/test/MachO/load-commands.s
    lld/test/MachO/local-got.s
    lld/test/MachO/relocations.s

Removed: 
    


################################################################################
diff  --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index c47b878b6c59..c5239469ff2d 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -24,6 +24,7 @@
 #include "llvm/BinaryFormat/MachO.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Support/LEB128.h"
+#include "llvm/Support/MD5.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Path.h"
 
@@ -35,9 +36,7 @@ using namespace lld;
 using namespace lld::macho;
 
 namespace {
-class LCLinkEdit;
-class LCDyldInfo;
-class LCSymtab;
+class LCUuid;
 
 class Writer {
 public:
@@ -51,6 +50,7 @@ class Writer {
 
   void openFile();
   void writeSections();
+  void writeUuid();
 
   void run();
 
@@ -62,6 +62,7 @@ class Writer {
   SymtabSection *symtabSection = nullptr;
   IndirectSymtabSection *indirectSymtabSection = nullptr;
   UnwindInfoSection *unwindInfoSection = nullptr;
+  LCUuid *uuidCommand = nullptr;
 };
 
 // LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information.
@@ -341,6 +342,30 @@ class LCBuildVersion : public LoadCommand {
   const PlatformInfo &platform;
 };
 
+// Stores a unique identifier for the output file based on an MD5 hash of its
+// contents. In order to hash the contents, we must first write them, but
+// LC_UUID itself must be part of the written contents in order for all the
+// offsets to be calculated correctly. We resolve this circular paradox by
+// first writing an LC_UUID with an all-zero UUID, then updating the UUID with
+// its real value later.
+class LCUuid : public LoadCommand {
+public:
+  uint32_t getSize() const override { return sizeof(uuid_command); }
+
+  void writeTo(uint8_t *buf) const override {
+    auto *c = reinterpret_cast<uuid_command *>(buf);
+    c->cmd = LC_UUID;
+    c->cmdsize = getSize();
+    uuidBuf = c->uuid;
+  }
+
+  void writeUuid(const std::array<uint8_t, 16> &uuid) const {
+    memcpy(uuidBuf, uuid.data(), uuid.size());
+  }
+
+  mutable uint8_t *uuidBuf;
+};
+
 } // namespace
 
 void Writer::scanRelocations() {
@@ -391,6 +416,9 @@ void Writer::createLoadCommands() {
 
   in.header->addLoadCommand(make<LCBuildVersion>(config->platform));
 
+  uuidCommand = make<LCUuid>();
+  in.header->addLoadCommand(uuidCommand);
+
   uint8_t segIndex = 0;
   for (OutputSegment *seg : outputSegments) {
     in.header->addLoadCommand(make<LCSegment>(seg->name, seg));
@@ -618,6 +646,21 @@ void Writer::writeSections() {
       osec->writeTo(buf + osec->fileOff);
 }
 
+void Writer::writeUuid() {
+  MD5 hash;
+  const auto *bufStart = reinterpret_cast<char *>(buffer->getBufferStart());
+  const auto *bufEnd = reinterpret_cast<char *>(buffer->getBufferEnd());
+  hash.update(StringRef(bufStart, bufEnd - bufStart));
+  MD5::MD5Result result;
+  hash.final(result);
+  // Conform to UUID version 4 & 5 as specified in RFC 4122:
+  // 1. Set the version field to indicate that this is an MD5-based UUID.
+  result.Bytes[6] = (result.Bytes[6] & 0xf) | 0x30;
+  // 2. Set the two MSBs of uuid_t::clock_seq_hi_and_reserved to zero and one.
+  result.Bytes[8] = (result.Bytes[8] & 0x3f) | 0x80;
+  uuidCommand->writeUuid(result.Bytes);
+}
+
 void Writer::run() {
   // dyld requires __LINKEDIT segment to always exist (even if empty).
   OutputSegment *linkEditSegment =
@@ -668,6 +711,7 @@ void Writer::run() {
     return;
 
   writeSections();
+  writeUuid();
 
   if (auto e = buffer->commit())
     error("failed to write to the output file: " + toString(std::move(e)));

diff  --git a/lld/test/MachO/headerpad.s b/lld/test/MachO/headerpad.s
index 7abc7f23e7c0..0f4f19ce9d62 100644
--- a/lld/test/MachO/headerpad.s
+++ b/lld/test/MachO/headerpad.s
@@ -13,8 +13,8 @@
 # RUN: %lld -o %t %t.o
 # RUN: llvm-objdump --macho --all-headers %t | FileCheck %s --check-prefix=PADx
 #
-# PADx:      magic        {{.+}}  ncmds sizeofcmds        flags
-# PADx-NEXT: MH_MAGIC_64  {{.+}}  9     [[#%u, CMDSIZE:]] {{.*}}
+# PADx:      magic        {{.+}}  ncmds  sizeofcmds         flags
+# PADx-NEXT: MH_MAGIC_64  {{.+}}  [[#]]  [[#%u, CMDSIZE:]]  {{.*}}
 # PADx:      sectname __text
 # PADx-NEXT: segname __TEXT
 # PADx-NEXT: addr
@@ -27,8 +27,8 @@
 # RUN: %lld -o %t %t.o -headerpad 0 -headerpad_max_install_names
 # RUN: llvm-objdump --macho --all-headers %t | FileCheck %s --check-prefix=PAD0
 #
-# PAD0:      magic        {{.+}}  ncmds sizeofcmds        flags
-# PAD0-NEXT: MH_MAGIC_64  {{.+}}  9     [[#%u, CMDSIZE:]] {{.*}}
+# PAD0:      magic        {{.+}}  ncmds  sizeofcmds         flags
+# PAD0-NEXT: MH_MAGIC_64  {{.+}}  [[#]]  [[#%u, CMDSIZE:]]  {{.*}}
 # PAD0:      sectname __text
 # PAD0-NEXT: segname __TEXT
 # PAD0-NEXT: addr
@@ -43,8 +43,8 @@
 # RUN: %lld -o %t %t.o -headerpad 0X11 -headerpad_max_install_names
 # RUN: llvm-objdump --macho --all-headers %t | FileCheck %s --check-prefix=PAD11
 #
-# PAD11:      magic        {{.+}}  ncmds sizeofcmds        flags
-# PAD11-NEXT: MH_MAGIC_64  {{.+}}  9     [[#%u, CMDSIZE:]] {{.*}}
+# PAD11:      magic        {{.+}}  ncmds  sizeofcmds         flags
+# PAD11-NEXT: MH_MAGIC_64  {{.+}}  [[#]]  [[#%u, CMDSIZE:]]  {{.*}}
 # PAD11:      sectname __text
 # PAD11-NEXT: segname __TEXT
 # PAD11-NEXT: addr
@@ -70,7 +70,7 @@
 # PADMAX-NEXT: segname __TEXT
 # PADMAX-NEXT: addr
 # PADMAX-NEXT: size
-# PADMAX-NEXT: offset [[#%u, CMDSIZE + 0x20 + mul(0x400, N - 6)]]
+# PADMAX-NEXT: offset [[#%u, CMDSIZE + 0x20 + mul(0x400, N - 7)]]
 
 ################ All 3 kinds of LCDylib swamped by a larger override
 # RUN: %lld -o %T/libnull.dylib %T/null.o -dylib \

diff  --git a/lld/test/MachO/load-commands.s b/lld/test/MachO/load-commands.s
index 5e29cecc2990..ec37d98408ef 100644
--- a/lld/test/MachO/load-commands.s
+++ b/lld/test/MachO/load-commands.s
@@ -9,6 +9,7 @@
 # COMMON-DAG: cmd LC_DYLD_INFO_ONLY
 # COMMON-DAG: cmd LC_SYMTAB
 # COMMON-DAG: cmd LC_DYSYMTAB
+# COMMON-DAG: cmd LC_UUID
 
 ## Check for the presence of load commands that are essential for a working
 ## executable. Also check that it has the right filetype.

diff  --git a/lld/test/MachO/local-got.s b/lld/test/MachO/local-got.s
index 60fedb1beca6..e4a1c5e9b979 100644
--- a/lld/test/MachO/local-got.s
+++ b/lld/test/MachO/local-got.s
@@ -12,12 +12,12 @@
 ## address offset and the contents at that address very similarly, so am using
 ## --match-full-lines to make sure we match on the right thing.
 # CHECK:      Contents of section __TEXT,__cstring:
-# CHECK-NEXT: 10000040c {{.*}}
+# CHECK-NEXT: 100000424 {{.*}}
 
 ## 1st 8 bytes refer to the start of __cstring + 0xe, 2nd 8 bytes refer to the
 ## start of __cstring
 # CHECK:      Contents of section __DATA_CONST,__got:
-# CHECK-NEXT: [[#%X,ADDR:]]  1a040000 01000000 0c040000 01000000 {{.*}}
+# CHECK-NEXT: [[#%X,ADDR:]]  32040000 01000000 24040000 01000000 {{.*}}
 # CHECK-NEXT: [[#ADDR + 16]] 00000000 00000000 {{.*}}
 
 ## Check that the rebase table is empty.

diff  --git a/lld/test/MachO/relocations.s b/lld/test/MachO/relocations.s
index 7ed365806e9b..c5627c49583f 100644
--- a/lld/test/MachO/relocations.s
+++ b/lld/test/MachO/relocations.s
@@ -21,7 +21,7 @@
 
 # RUN: llvm-objdump --section=__const --full-contents %t | FileCheck %s --check-prefix=NONPCREL
 # NONPCREL:      Contents of section __DATA,__const:
-# NONPCREL-NEXT: 100001000 f0030000 01000000 f0030000 01000000
+# NONPCREL-NEXT: 100001000 08040000 01000000 08040000 01000000
 
 .section __TEXT,__text
 .globl _main, _f


        


More information about the llvm-commits mailing list