[lld] cf918c8 - [lld-macho] Implement -ObjC

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 26 19:26:58 PDT 2020


Author: Jez Ng
Date: 2020-08-26T19:20:55-07:00
New Revision: cf918c809bb0efc43c911e0df079d78a02e85f60

URL: https://github.com/llvm/llvm-project/commit/cf918c809bb0efc43c911e0df079d78a02e85f60
DIFF: https://github.com/llvm/llvm-project/commit/cf918c809bb0efc43c911e0df079d78a02e85f60.diff

LOG: [lld-macho] Implement -ObjC

It's roughly like -force_load with some filtering.

Differential Revision: https://reviews.llvm.org/D86181

Added: 
    lld/MachO/ObjC.cpp
    lld/MachO/ObjC.h
    lld/test/MachO/objc.s

Modified: 
    lld/MachO/CMakeLists.txt
    lld/MachO/Config.h
    lld/MachO/Driver.cpp
    lld/MachO/InputFiles.cpp
    lld/MachO/InputFiles.h

Removed: 
    


################################################################################
diff  --git a/lld/MachO/CMakeLists.txt b/lld/MachO/CMakeLists.txt
index 985ad7d8b7df..716449c8574a 100644
--- a/lld/MachO/CMakeLists.txt
+++ b/lld/MachO/CMakeLists.txt
@@ -10,6 +10,7 @@ add_lld_library(lldMachO2
   InputFiles.cpp
   InputSection.cpp
   MergedOutputSection.cpp
+  ObjC.cpp
   OutputSection.cpp
   OutputSegment.cpp
   SymbolTable.cpp

diff  --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index 0c6644041dba..c126dbebe76d 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -33,6 +33,7 @@ struct PlatformInfo {
 struct Configuration {
   Symbol *entry;
   bool hasReexports = false;
+  bool forceLoadObjC = false;
   uint32_t headerPad;
   llvm::StringRef installName;
   llvm::StringRef outputFile;

diff  --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index d24494f752c5..90aaa219c409 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -10,6 +10,7 @@
 #include "Config.h"
 #include "DriverUtils.h"
 #include "InputFiles.h"
+#include "ObjC.h"
 #include "OutputSection.h"
 #include "OutputSegment.h"
 #include "SymbolTable.h"
@@ -210,6 +211,29 @@ static void getFrameworkSearchPaths(opt::InputArgList &args,
                  {"/Library/Frameworks", "/System/Library/Frameworks"});
 }
 
+// Returns slices of MB by parsing MB as an archive file.
+// Each slice consists of a member file in the archive.
+static std::vector<MemoryBufferRef> getArchiveMembers(MemoryBufferRef mb) {
+  std::unique_ptr<Archive> file =
+      CHECK(Archive::create(mb),
+            mb.getBufferIdentifier() + ": failed to parse archive");
+
+  std::vector<MemoryBufferRef> v;
+  Error err = Error::success();
+  for (const Archive::Child &c : file->children(err)) {
+    MemoryBufferRef mbref =
+        CHECK(c.getMemoryBufferRef(),
+              mb.getBufferIdentifier() +
+                  ": could not get the buffer for a child of the archive");
+    v.push_back(mbref);
+  }
+  if (err)
+    fatal(mb.getBufferIdentifier() +
+          ": Archive::children failed: " + toString(std::move(err)));
+
+  return v;
+}
+
 static void addFile(StringRef path) {
   Optional<MemoryBufferRef> buffer = readFile(path);
   if (!buffer)
@@ -224,6 +248,21 @@ static void addFile(StringRef path) {
     if (!file->isEmpty() && !file->hasSymbolTable())
       error(path + ": archive has no index; run ranlib to add one");
 
+    if (config->forceLoadObjC) {
+      for (const object::Archive::Symbol &sym : file->symbols())
+        if (sym.getName().startswith(objc::klass))
+          symtab->addUndefined(sym.getName());
+
+      // TODO: no need to look for ObjC sections for a given archive member if
+      // we already found that it contains an ObjC symbol. We should also
+      // consider creating a LazyObjFile class in order to avoid double-loading
+      // these files here and below (as part of the ArchiveFile).
+      if (Optional<MemoryBufferRef> buffer = readFile(path))
+        for (MemoryBufferRef member : getArchiveMembers(*buffer))
+          if (hasObjCSection(member))
+            inputFiles.push_back(make<ObjFile>(member));
+    }
+
     inputFiles.push_back(make<ArchiveFile>(std::move(file)));
     break;
   }
@@ -254,29 +293,6 @@ static void addFileList(StringRef path) {
     addFile(path);
 }
 
-// Returns slices of MB by parsing MB as an archive file.
-// Each slice consists of a member file in the archive.
-static std::vector<MemoryBufferRef> getArchiveMembers(MemoryBufferRef mb) {
-  std::unique_ptr<Archive> file =
-      CHECK(Archive::create(mb),
-            mb.getBufferIdentifier() + ": failed to parse archive");
-
-  std::vector<MemoryBufferRef> v;
-  Error err = Error::success();
-  for (const Archive::Child &c : file->children(err)) {
-    MemoryBufferRef mbref =
-        CHECK(c.getMemoryBufferRef(),
-              mb.getBufferIdentifier() +
-                  ": could not get the buffer for a child of the archive");
-    v.push_back(mbref);
-  }
-  if (err)
-    fatal(mb.getBufferIdentifier() +
-          ": Archive::children failed: " + toString(std::move(err)));
-
-  return v;
-}
-
 static void forceLoadArchive(StringRef path) {
   if (Optional<MemoryBufferRef> buffer = readFile(path))
     for (MemoryBufferRef member : getArchiveMembers(*buffer))
@@ -517,6 +533,7 @@ bool macho::link(llvm::ArrayRef<const char *> argsArr, bool canExitEarly,
 
   getLibrarySearchPaths(args, roots, config->librarySearchPaths);
   getFrameworkSearchPaths(args, roots, config->frameworkSearchPaths);
+  config->forceLoadObjC = args.hasArg(OPT_ObjC);
 
   if (args.hasArg(OPT_v)) {
     message(getLLDVersion());
@@ -571,6 +588,7 @@ bool macho::link(llvm::ArrayRef<const char *> argsArr, bool canExitEarly,
     case OPT_e:
     case OPT_F:
     case OPT_L:
+    case OPT_ObjC:
     case OPT_headerpad:
     case OPT_install_name:
     case OPT_rpath:

diff  --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 0a3e3f6558a7..2fc8cee0bae4 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -47,7 +47,9 @@
 #include "ExportTrie.h"
 #include "InputSection.h"
 #include "MachOStructs.h"
+#include "ObjC.h"
 #include "OutputSection.h"
+#include "OutputSegment.h"
 #include "SymbolTable.h"
 #include "Symbols.h"
 #include "Target.h"
@@ -116,7 +118,7 @@ Optional<MemoryBufferRef> macho::readFile(StringRef path) {
   return None;
 }
 
-static const load_command *findCommand(const mach_header_64 *hdr,
+const load_command *macho::findCommand(const mach_header_64 *hdr,
                                        uint32_t type) {
   const uint8_t *p =
       reinterpret_cast<const uint8_t *>(hdr) + sizeof(mach_header_64);
@@ -137,8 +139,10 @@ void InputFile::parseSections(ArrayRef<section_64> sections) {
   for (const section_64 &sec : sections) {
     InputSection *isec = make<InputSection>();
     isec->file = this;
-    isec->name = StringRef(sec.sectname, strnlen(sec.sectname, 16));
-    isec->segname = StringRef(sec.segname, strnlen(sec.segname, 16));
+    isec->name =
+        StringRef(sec.sectname, strnlen(sec.sectname, sizeof(sec.sectname)));
+    isec->segname =
+        StringRef(sec.segname, strnlen(sec.segname, sizeof(sec.segname)));
     isec->data = {isZeroFill(sec.flags) ? nullptr : buf + sec.offset,
                   static_cast<size_t>(sec.size)};
     if (sec.align >= 32)
@@ -474,14 +478,14 @@ DylibFile::DylibFile(const InterfaceFile &interface, DylibFile *umbrella)
     case SymbolKind::ObjectiveCClass:
       // XXX ld64 only creates these symbols when -ObjC is passed in. We may
       // want to emulate that.
-      addSymbol("_OBJC_CLASS_$_" + symbol->getName());
-      addSymbol("_OBJC_METACLASS_$_" + symbol->getName());
+      addSymbol(objc::klass + symbol->getName());
+      addSymbol(objc::metaclass + symbol->getName());
       break;
     case SymbolKind::ObjectiveCClassEHType:
-      addSymbol("_OBJC_EHTYPE_$_" + symbol->getName());
+      addSymbol(objc::ehtype + symbol->getName());
       break;
     case SymbolKind::ObjectiveCInstanceVariable:
-      addSymbol("_OBJC_IVAR_$_" + symbol->getName());
+      addSymbol(objc::ivar + symbol->getName());
       break;
     }
   }

diff  --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h
index 048e8877b730..194de0e1a4e9 100644
--- a/lld/MachO/InputFiles.h
+++ b/lld/MachO/InputFiles.h
@@ -128,6 +128,9 @@ extern std::vector<InputFile *> inputFiles;
 
 llvm::Optional<MemoryBufferRef> readFile(StringRef path);
 
+const llvm::MachO::load_command *
+findCommand(const llvm::MachO::mach_header_64 *, uint32_t type);
+
 } // namespace macho
 
 std::string toString(const macho::InputFile *file);

diff  --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
new file mode 100644
index 000000000000..21691ef5255b
--- /dev/null
+++ b/lld/MachO/ObjC.cpp
@@ -0,0 +1,36 @@
+//===- ObjC.cpp -----------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "ObjC.h"
+#include "InputFiles.h"
+#include "OutputSegment.h"
+
+#include "llvm/BinaryFormat/MachO.h"
+
+using namespace llvm;
+using namespace llvm::MachO;
+using namespace lld;
+
+bool macho::hasObjCSection(MemoryBufferRef mb) {
+  auto *hdr = reinterpret_cast<const mach_header_64 *>(mb.getBufferStart());
+  if (const load_command *cmd = findCommand(hdr, LC_SEGMENT_64)) {
+    auto *c = reinterpret_cast<const segment_command_64 *>(cmd);
+    auto sectionHeaders = ArrayRef<section_64>{
+        reinterpret_cast<const section_64 *>(c + 1), c->nsects};
+    for (const section_64 &sec : sectionHeaders) {
+      StringRef sectname(sec.sectname,
+                         strnlen(sec.sectname, sizeof(sec.sectname)));
+      StringRef segname(sec.segname, strnlen(sec.segname, sizeof(sec.segname)));
+      if ((segname == segment_names::data && sectname == "__objc_catlist") ||
+          (segname == segment_names::text && sectname == "__swift")) {
+        return true;
+      }
+    }
+  }
+  return false;
+}

diff  --git a/lld/MachO/ObjC.h b/lld/MachO/ObjC.h
new file mode 100644
index 000000000000..8db459ad8e2b
--- /dev/null
+++ b/lld/MachO/ObjC.h
@@ -0,0 +1,31 @@
+//===- ObjC.h ---------------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_MACHO_OBJC_H
+#define LLD_MACHO_OBJC_H
+
+#include "llvm/Support/MemoryBuffer.h"
+
+namespace lld {
+namespace macho {
+
+namespace objc {
+
+constexpr const char klass[] = "_OBJC_CLASS_$_";
+constexpr const char metaclass[] = "_OBJC_METACLASS_$_";
+constexpr const char ehtype[] = "_OBJC_EHTYPE_$_";
+constexpr const char ivar[] = "_OBJC_IVAR_$_";
+
+} // namespace objc
+
+bool hasObjCSection(llvm::MemoryBufferRef);
+
+} // namespace macho
+} // namespace lld
+
+#endif

diff  --git a/lld/test/MachO/objc.s b/lld/test/MachO/objc.s
new file mode 100644
index 000000000000..ff5b21b021e4
--- /dev/null
+++ b/lld/test/MachO/objc.s
@@ -0,0 +1,63 @@
+# REQUIRES: x86
+# RUN: split-file %s %t
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/has-objc-symbol.s -o %t/has-objc-symbol.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/has-objc-category.s -o %t/has-objc-category.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/has-swift.s -o %t/has-swift.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/no-objc.s -o %t/no-objc.o
+
+# RUN: rm -f %t/libHasSomeObjC.a
+# RUN: llvm-ar rcs %t/libHasSomeObjC.a %t/has-objc-symbol.o %t/has-objc-category.o %t/has-swift.o %t/no-objc.o
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o
+# RUN: lld -flavor darwinnew -syslibroot %S/Inputs/MacOSX.sdk -lSystem %t/test.o -o %t/test \
+# RUN:   -L%t -lHasSomeObjC -ObjC
+# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=OBJC
+
+# OBJC:       Sections:
+# OBJC-NEXT:  Idx Name           Size   VMA  Type
+# OBJC-NEXT:    0 __text         {{.*}}      TEXT
+# OBJC-NEXT:    1 __swift        {{.*}}      DATA
+# OBJC-NEXT:    2 __objc_catlist {{.*}}      DATA
+# OBJC-EMPTY:
+# OBJC-NEXT:  SYMBOL TABLE:
+# OBJC-NEXT:  g     F __TEXT,__text _main
+# OBJC-NEXT:  g     F __TEXT,__text _OBJC_CLASS_$_MyObject
+
+# RUN: lld -flavor darwinnew -syslibroot %S/Inputs/MacOSX.sdk -lSystem %t/test.o -o %t/test \
+# RUN:   -L%t -lHasObjCSymbol -lHasObjCCategory -lHasSwift
+# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=NO-OBJC
+
+# NO-OBJC:       Sections:
+# NO-OBJC-NEXT:  Idx Name           Size   VMA  Type
+# NO-OBJC-NEXT:    0 __text         {{.*}}      TEXT
+# NO-OBJC-EMPTY:
+# NO-OBJC-NEXT:  SYMBOL TABLE:
+# NO-OBJC-NEXT:  g     F __TEXT,__text _main
+# NO-OBJC-EMPTY:
+
+#--- has-objc-symbol.s
+.globl _OBJC_CLASS_$_MyObject
+_OBJC_CLASS_$_MyObject:
+
+#--- has-objc-category.s
+.section	__DATA,__objc_catlist
+.quad 0x1234
+
+#--- has-swift.s
+.section	__TEXT,__swift
+.quad 0x1234
+
+#--- no-objc.s
+## This archive member should not be pulled in since it does not contain any
+## ObjC-related data.
+.globl _foo
+.section __DATA,foo
+
+foo:
+  .quad 0x1234
+
+#--- test.s
+.globl _main
+_main:
+  ret


        


More information about the llvm-commits mailing list