[lld] 001ba65 - [lld-macho] De-templatize mach_header operations

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Mon May 3 15:31:34 PDT 2021


Author: Jez Ng
Date: 2021-05-03T18:31:23-04:00
New Revision: 001ba65375f79a76491677cc2de05637f15a7a57

URL: https://github.com/llvm/llvm-project/commit/001ba65375f79a76491677cc2de05637f15a7a57
DIFF: https://github.com/llvm/llvm-project/commit/001ba65375f79a76491677cc2de05637f15a7a57.diff

LOG: [lld-macho] De-templatize mach_header operations

@thakis pointed out that `mach_header` and `mach_header_64`
actually have the same set of (used) fields, with the 64-bit version
having extra padding. So we can access the fields we need using the
single `mach_header` type instead of using templates to switch between
the two.

I also spotted a potential issue where hasObjCSection tries to parse a
file w/o checking if it does indeed match the target arch... As such,
I've added a quick magic number check to ensure we don't access invalid
memory during `findCommand()`.

Addresses PR50180.

Reviewed By: #lld-macho, thakis

Differential Revision: https://reviews.llvm.org/D101724

Added: 
    

Modified: 
    lld/MachO/Driver.cpp
    lld/MachO/InputFiles.cpp
    lld/MachO/InputFiles.h
    lld/MachO/ObjC.cpp
    lld/MachO/SyntheticSections.cpp
    lld/MachO/SyntheticSections.h
    lld/MachO/Target.h
    lld/MachO/Writer.cpp
    lld/MachO/Writer.h
    lld/test/MachO/objc.s

Removed: 
    


################################################################################
diff  --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index dafd914a11bd..e3d266b46596 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1142,11 +1142,7 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
             "\n>>> referenced from option -exported_symbol(s_list)");
     }
 
-    if (target->wordSize == 8)
-      createSyntheticSections<LP64>();
-    else
-      createSyntheticSections<ILP32>();
-
+    createSyntheticSections();
     createSyntheticSymbols();
 
     for (const Arg *arg : args.filtered(OPT_sectcreate)) {

diff  --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index a553754b8efc..5f1f772a2d7a 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -102,13 +102,11 @@ static VersionTuple decodeVersion(uint32_t version) {
   return VersionTuple(major, minor, subMinor);
 }
 
-template <class LP>
 static Optional<PlatformInfo> getPlatformInfo(const InputFile *input) {
   if (!isa<ObjFile>(input) && !isa<DylibFile>(input))
     return None;
 
-  using Header = typename LP::mach_header;
-  auto *hdr = reinterpret_cast<const Header *>(input->mb.getBufferStart());
+  const char *hdr = input->mb.getBufferStart();
 
   PlatformInfo platformInfo;
   if (const auto *cmd =
@@ -141,8 +139,8 @@ static Optional<PlatformInfo> getPlatformInfo(const InputFile *input) {
   return None;
 }
 
-template <class LP> static bool checkCompatibility(const InputFile *input) {
-  Optional<PlatformInfo> platformInfo = getPlatformInfo<LP>(input);
+static bool checkCompatibility(const InputFile *input) {
+  Optional<PlatformInfo> platformInfo = getPlatformInfo(input);
   if (!platformInfo)
     return true;
   // TODO: Correctly detect simulator platforms or relax this check.
@@ -639,7 +637,7 @@ template <class LP> void ObjFile::parse() {
     return;
   }
 
-  if (!checkCompatibility<LP>(this))
+  if (!checkCompatibility(this))
     return;
 
   if (const load_command *cmd = findCommand(hdr, LC_LINKER_OPTION)) {
@@ -782,16 +780,8 @@ DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella,
   if (umbrella == nullptr)
     umbrella = this;
 
-  if (target->wordSize == 8)
-    parse<LP64>(umbrella);
-  else
-    parse<ILP32>(umbrella);
-}
-
-template <class LP> void DylibFile::parse(DylibFile *umbrella) {
-  using Header = typename LP::mach_header;
   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
-  auto *hdr = reinterpret_cast<const Header *>(mb.getBufferStart());
+  auto *hdr = reinterpret_cast<const mach_header *>(mb.getBufferStart());
 
   // Initialize dylibName.
   if (const load_command *cmd = findCommand(hdr, LC_ID_DYLIB)) {
@@ -806,7 +796,7 @@ template <class LP> void DylibFile::parse(DylibFile *umbrella) {
     return;
   }
 
-  if (!checkCompatibility<LP>(this))
+  if (!checkCompatibility(this))
     return;
 
   // Initialize symbols.
@@ -825,7 +815,8 @@ template <class LP> void DylibFile::parse(DylibFile *umbrella) {
     return;
   }
 
-  const uint8_t *p = reinterpret_cast<const uint8_t *>(hdr) + sizeof(Header);
+  const uint8_t *p =
+      reinterpret_cast<const uint8_t *>(hdr) + target->headerSize;
   for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) {
     auto *cmd = reinterpret_cast<const load_command *>(p);
     p += cmd->cmdsize;
@@ -1008,4 +999,3 @@ BitcodeFile::BitcodeFile(MemoryBufferRef mbref)
 }
 
 template void ObjFile::parse<LP64>();
-template void DylibFile::parse<LP64>(DylibFile *umbrella);

diff  --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h
index 066d4506644c..d57e174f15c1 100644
--- a/lld/MachO/InputFiles.h
+++ b/lld/MachO/InputFiles.h
@@ -10,6 +10,7 @@
 #define LLD_MACHO_INPUT_FILES_H
 
 #include "MachOStructs.h"
+#include "Target.h"
 
 #include "lld/Common/LLVM.h"
 #include "lld/Common/Memory.h"
@@ -158,9 +159,6 @@ class DylibFile : public InputFile {
   // implemented in the bundle. When used like this, it is very similar
   // to a Dylib, so we re-used the same class to represent it.
   bool isBundleLoader;
-
-private:
-  template <class LP> void parse(DylibFile *umbrella);
 };
 
 // .a file
@@ -189,12 +187,13 @@ extern llvm::SetVector<InputFile *> inputFiles;
 
 llvm::Optional<MemoryBufferRef> readFile(StringRef path);
 
-template <class CommandType = llvm::MachO::load_command, class Header,
-          class... Types>
-const CommandType *findCommand(const Header *hdr, Types... types) {
+// anyHdr should be a pointer to either mach_header or mach_header_64
+template <class CommandType = llvm::MachO::load_command, class... Types>
+const CommandType *findCommand(const void *anyHdr, Types... types) {
   std::initializer_list<uint32_t> typesList{types...};
-  const uint8_t *p = reinterpret_cast<const uint8_t *>(hdr) + sizeof(Header);
-
+  const auto *hdr = reinterpret_cast<const llvm::MachO::mach_header *>(anyHdr);
+  const uint8_t *p =
+      reinterpret_cast<const uint8_t *>(hdr) + target->headerSize;
   for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) {
     auto *cmd = reinterpret_cast<const CommandType *>(p);
     if (llvm::is_contained(typesList, cmd->cmd))

diff  --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index 8a748ceef6ab..7ed800827f3e 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -24,6 +24,9 @@ template <class LP> static bool hasObjCSection(MemoryBufferRef mb) {
 
   auto *hdr =
       reinterpret_cast<const typename LP::mach_header *>(mb.getBufferStart());
+  if (hdr->magic != LP::magic)
+    return false;
+
   if (const auto *c =
           findCommand<typename LP::segment_command>(hdr, LP::segmentLCType)) {
     auto sectionHeaders =

diff  --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index cc595b3860c5..0805f78b4dad 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -71,22 +71,8 @@ void MachHeaderSection::addLoadCommand(LoadCommand *lc) {
   sizeOfCmds += lc->getSize();
 }
 
-// This serves to hide (type-erase) the template parameter from
-// MachHeaderSection.
-template <class LP> class MachHeaderSectionImpl : public MachHeaderSection {
-public:
-  MachHeaderSectionImpl() = default;
-  uint64_t getSize() const override;
-  void writeTo(uint8_t *buf) const override;
-};
-
-template <class LP> MachHeaderSection *macho::makeMachHeaderSection() {
-  return make<MachHeaderSectionImpl<LP>>();
-}
-
-template <class LP> uint64_t MachHeaderSectionImpl<LP>::getSize() const {
-  uint64_t size =
-      sizeof(typename LP::mach_header) + sizeOfCmds + config->headerPad;
+uint64_t MachHeaderSection::getSize() const {
+  uint64_t size = target->headerSize + sizeOfCmds + config->headerPad;
   // If we are emitting an encryptable binary, our load commands must have a
   // separate (non-encrypted) page to themselves.
   if (config->emitEncryptionInfo)
@@ -106,10 +92,9 @@ static uint32_t cpuSubtype() {
   return subtype;
 }
 
-template <class LP>
-void MachHeaderSectionImpl<LP>::writeTo(uint8_t *buf) const {
-  auto *hdr = reinterpret_cast<typename LP::mach_header *>(buf);
-  hdr->magic = LP::magic;
+void MachHeaderSection::writeTo(uint8_t *buf) const {
+  auto *hdr = reinterpret_cast<mach_header *>(buf);
+  hdr->magic = target->magic;
   hdr->cputype = target->cpuType;
   hdr->cpusubtype = cpuSubtype();
   hdr->filetype = config->outputType;
@@ -144,7 +129,7 @@ void MachHeaderSectionImpl<LP>::writeTo(uint8_t *buf) const {
     }
   }
 
-  uint8_t *p = reinterpret_cast<uint8_t *>(hdr + 1);
+  uint8_t *p = reinterpret_cast<uint8_t *>(hdr) + target->headerSize;
   for (const LoadCommand *lc : loadCommands) {
     lc->writeTo(p);
     p += lc->getSize();
@@ -1154,7 +1139,5 @@ void macho::createSyntheticSymbols() {
   addHeaderSymbol("___dso_handle");
 }
 
-template MachHeaderSection *macho::makeMachHeaderSection<LP64>();
-template MachHeaderSection *macho::makeMachHeaderSection<ILP32>();
 template SymtabSection *macho::makeSymtabSection<LP64>(StringTableSection &);
 template SymtabSection *macho::makeSymtabSection<ILP32>(StringTableSection &);

diff  --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index 38fee8681f81..1da7dc2a1cfe 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -80,17 +80,18 @@ class LinkEditSection : public SyntheticSection {
 // The header of the Mach-O file, which must have a file offset of zero.
 class MachHeaderSection : public SyntheticSection {
 public:
-  void addLoadCommand(LoadCommand *);
+  MachHeaderSection();
   bool isHidden() const override { return true; }
+  uint64_t getSize() const override;
+  void writeTo(uint8_t *buf) const override;
+
+  void addLoadCommand(LoadCommand *);
 
 protected:
-  MachHeaderSection();
   std::vector<LoadCommand *> loadCommands;
   uint32_t sizeOfCmds = 0;
 };
 
-template <class LP> MachHeaderSection *makeMachHeaderSection();
-
 // A hidden section that exists solely for the purpose of creating the
 // __PAGEZERO segment, which is used to catch null pointer dereferences.
 class PageZeroSection : public SyntheticSection {

diff  --git a/lld/MachO/Target.h b/lld/MachO/Target.h
index 4084db6403a5..eb34c4575132 100644
--- a/lld/MachO/Target.h
+++ b/lld/MachO/Target.h
@@ -32,7 +32,9 @@ class TargetInfo {
   template <class LP> TargetInfo(LP) {
     // Having these values available in TargetInfo allows us to access them
     // without having to resort to templates.
+    magic = LP::magic;
     pageZeroSize = LP::pageZeroSize;
+    headerSize = sizeof(typename LP::mach_header);
     wordSize = LP::wordSize;
   }
 
@@ -67,10 +69,12 @@ class TargetInfo {
     return getRelocAttrs(type).hasAttr(bit);
   }
 
+  uint32_t magic;
   uint32_t cpuType;
   uint32_t cpuSubtype;
 
   uint64_t pageZeroSize;
+  size_t headerSize;
   size_t stubSize;
   size_t stubHelperHeaderSize;
   size_t stubHelperEntrySize;

diff  --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index ca46eecec162..5c4465e2a24e 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -1063,8 +1063,8 @@ template <class LP> void Writer::run() {
 
 template <class LP> void macho::writeResult() { Writer().run<LP>(); }
 
-template <class LP> void macho::createSyntheticSections() {
-  in.header = makeMachHeaderSection<LP>();
+void macho::createSyntheticSections() {
+  in.header = make<MachHeaderSection>();
   in.rebase = make<RebaseSection>();
   in.binding = make<BindingSection>();
   in.weakBinding = make<WeakBindingSection>();
@@ -1083,5 +1083,3 @@ OutputSection *macho::firstTLVDataSection = nullptr;
 
 template void macho::writeResult<LP64>();
 template void macho::writeResult<ILP32>();
-template void macho::createSyntheticSections<LP64>();
-template void macho::createSyntheticSections<ILP32>();

diff  --git a/lld/MachO/Writer.h b/lld/MachO/Writer.h
index 7369a6d3990a..56f6f7ae6fe7 100644
--- a/lld/MachO/Writer.h
+++ b/lld/MachO/Writer.h
@@ -27,7 +27,7 @@ class LoadCommand {
 
 template <class LP> void writeResult();
 
-template <class LP> void createSyntheticSections();
+void createSyntheticSections();
 
 // Add bindings for symbols that need weak or non-lazy bindings.
 void addNonLazyBindingEntries(const Symbol *, const InputSection *,

diff  --git a/lld/test/MachO/objc.s b/lld/test/MachO/objc.s
index d72749717af7..39edf047502d 100644
--- a/lld/test/MachO/objc.s
+++ b/lld/test/MachO/objc.s
@@ -5,11 +5,12 @@
 # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/has-objc-category.s -o %t/has-objc-category.o
 # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/has-swift.s -o %t/has-swift.o
 # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/no-objc.s -o %t/no-objc.o
-# RUN: llvm-ar rcs %t/libHasSomeObjC.a %t/has-objc-symbol.o %t/has-objc-category.o %t/has-swift.o %t/no-objc.o
+## Make sure we don't mis-parse a 32-bit file as 64-bit
+# RUN: llvm-mc -filetype=obj -triple=armv7-apple-watchos %t/no-objc.s -o %t/wrong-arch.o
+# RUN: llvm-ar rcs %t/libHasSomeObjC.a %t/has-objc-symbol.o %t/has-objc-category.o %t/has-swift.o %t/no-objc.o %t/wrong-arch.o
 
 # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o
-# RUN: %lld -lSystem %t/test.o -o %t/test \
-# RUN:   -L%t -lHasSomeObjC -ObjC
+# RUN: %lld -lSystem %t/test.o -o %t/test -L%t -lHasSomeObjC -ObjC
 # RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=OBJC
 
 # OBJC:       Sections:
@@ -22,8 +23,7 @@
 # OBJC-NEXT:  g     F __TEXT,__text _main
 # OBJC-NEXT:  g     F __TEXT,__text _OBJC_CLASS_$_MyObject
 
-# RUN: %lld -lSystem %t/test.o -o %t/test \
-# RUN:   -L%t -lHasSomeObjC
+# RUN: %lld -lSystem %t/test.o -o %t/test -L%t -lHasSomeObjC
 # RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=NO-OBJC
 
 # NO-OBJC:       Sections:
@@ -56,6 +56,9 @@ _OBJC_CLASS_$_MyObject:
 foo:
   .quad 0x1234
 
+.section __DATA,bar
+.section __DATA,baz
+
 #--- test.s
 .globl _main
 _main:


        


More information about the llvm-commits mailing list