[lld] d23da0e - [lld-macho] Fold __objc_imageinfo sections

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Sat Jul 23 09:12:14 PDT 2022


Author: Jez Ng
Date: 2022-07-23T12:12:01-04:00
New Revision: d23da0ec6c539fa380b7552a99e6922efe7a55e8

URL: https://github.com/llvm/llvm-project/commit/d23da0ec6c539fa380b7552a99e6922efe7a55e8
DIFF: https://github.com/llvm/llvm-project/commit/d23da0ec6c539fa380b7552a99e6922efe7a55e8.diff

LOG: [lld-macho] Fold __objc_imageinfo sections

Previously, we treated it as a regular ConcatInputSection. However, ld64
actually parses its contents and uses that to synthesize a single image
info struct, generating one 8-byte section instead of `8 * number of
object files with ObjC code`.

I'm not entirely sure what impact this section has on the runtime, so I
just tried to follow ld64's semantics as closely as possible in this
diff. My main motivation though was to reduce binary size.

No significant perf change on chromium_framework on my 16-core Mac Pro:

             base           diff           difference (95% CI)
  sys_time   1.764 ± 0.062  1.748 ± 0.032  [  -2.4% ..   +0.5%]
  user_time  5.112 ± 0.104  5.106 ± 0.046  [  -0.9% ..   +0.7%]
  wall_time  6.111 ± 0.184  6.085 ± 0.076  [  -1.6% ..   +0.8%]
  samples    30             32

Reviewed By: #lld-macho, thakis

Differential Revision: https://reviews.llvm.org/D130125

Added: 
    lld/test/MachO/objc-imageinfo.s

Modified: 
    lld/MachO/Driver.cpp
    lld/MachO/InputFiles.cpp
    lld/MachO/InputFiles.h
    lld/MachO/InputSection.h
    lld/MachO/SyntheticSections.cpp
    lld/MachO/SyntheticSections.h
    lld/MachO/Writer.cpp
    lld/test/MachO/builtin-rename.s

Removed: 
    


################################################################################
diff  --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 8be29eb5b9da1..454708fad4ef2 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -586,7 +586,7 @@ static void initializeSectionRenameMap() {
                              section_names::objcCatList,
                              section_names::objcNonLazyCatList,
                              section_names::objcProtoList,
-                             section_names::objcImageInfo};
+                             section_names::objCImageInfo};
     for (StringRef s : v)
       config->sectionRenameMap[{segment_names::data, s}] = {
           segment_names::dataConst, s};
@@ -1102,6 +1102,8 @@ static void gatherInputSections() {
         }
       }
     }
+    if (!file->objCImageInfo.empty())
+      in.objCImageInfo->addFile(file);
   }
   assert(inputOrder <= UnspecifiedInputOrder);
 }

diff  --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 7a0b4628fe1be..e3bf553e53347 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -363,6 +363,9 @@ void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
       // have the same name without causing duplicate symbol errors. To avoid
       // spurious duplicate symbol errors, we do not parse these sections.
       // TODO: Evaluate whether the bitcode metadata is needed.
+    } else if (name == section_names::objCImageInfo &&
+               segname == segment_names::data) {
+      objCImageInfo = data;
     } else {
       if (name == section_names::addrSig)
         addrSigSection = sections.back();

diff  --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h
index efddc1c467822..5deb05272a6b1 100644
--- a/lld/MachO/InputFiles.h
+++ b/lld/MachO/InputFiles.h
@@ -120,6 +120,7 @@ class InputFile {
 
   std::vector<Symbol *> symbols;
   std::vector<Section *> sections;
+  ArrayRef<uint8_t> objCImageInfo;
 
   // If not empty, this stores the name of the archive containing this file.
   // We use this string for creating error messages.

diff  --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index 2b98adce9fa02..afe76c56b5362 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -321,7 +321,7 @@ constexpr const char objcCatList[] = "__objc_catlist";
 constexpr const char objcClassList[] = "__objc_classlist";
 constexpr const char objcClassRefs[] = "__objc_classrefs";
 constexpr const char objcConst[] = "__objc_const";
-constexpr const char objcImageInfo[] = "__objc_imageinfo";
+constexpr const char objCImageInfo[] = "__objc_imageinfo";
 constexpr const char objcNonLazyCatList[] = "__objc_nlcatlist";
 constexpr const char objcNonLazyClassList[] = "__objc_nlclslist";
 constexpr const char objcProtoList[] = "__objc_protolist";

diff  --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index e032ba124b7a3..36677b58398a6 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -1621,6 +1621,86 @@ void WordLiteralSection::writeTo(uint8_t *buf) const {
     memcpy(buf + p.second * 4, &p.first, 4);
 }
 
+ObjCImageInfoSection::ObjCImageInfoSection()
+    : SyntheticSection(segment_names::data, section_names::objCImageInfo) {}
+
+ObjCImageInfoSection::ImageInfo
+ObjCImageInfoSection::parseImageInfo(const InputFile *file) {
+  ImageInfo info;
+  ArrayRef<uint8_t> data = file->objCImageInfo;
+  // The image info struct has the following layout:
+  // struct {
+  //   uint32_t version;
+  //   uint32_t flags;
+  // };
+  if (data.size() < 8) {
+    warn(toString(file) + ": invalid __objc_imageinfo size");
+    return info;
+  }
+
+  auto *buf = reinterpret_cast<const uint32_t *>(data.data());
+  if (read32le(buf) != 0) {
+    warn(toString(file) + ": invalid __objc_imageinfo version");
+    return info;
+  }
+
+  uint32_t flags = read32le(buf + 1);
+  info.swiftVersion = (flags >> 8) & 0xff;
+  info.hasCategoryClassProperties = flags & 0x40;
+  return info;
+}
+
+static std::string swiftVersionString(uint8_t version) {
+  switch (version) {
+    case 1:
+      return "1.0";
+    case 2:
+      return "1.1";
+    case 3:
+      return "2.0";
+    case 4:
+      return "3.0";
+    case 5:
+      return "4.0";
+    default:
+      return ("0x" + Twine::utohexstr(version)).str();
+  }
+}
+
+// Validate each object file's __objc_imageinfo and use them to generate the
+// image info for the output binary. Only two pieces of info are relevant:
+// 1. The Swift version (should be identical across inputs)
+// 2. `bool hasCategoryClassProperties` (true only if true for all inputs)
+void ObjCImageInfoSection::finalizeContents() {
+  assert(files.size() != 0); // should have already been checked via isNeeded()
+
+  info.hasCategoryClassProperties = true;
+  const InputFile *firstFile;
+  for (auto file : files) {
+    ImageInfo inputInfo = parseImageInfo(file);
+    info.hasCategoryClassProperties &= inputInfo.hasCategoryClassProperties;
+
+    if (inputInfo.swiftVersion != 0) {
+      if (info.swiftVersion != 0 &&
+          info.swiftVersion != inputInfo.swiftVersion) {
+        error("Swift version mismatch: " + toString(firstFile) +
+              " has version " + swiftVersionString(info.swiftVersion) +
+              " but " + toString(file) + " has version " +
+              swiftVersionString(inputInfo.swiftVersion));
+      } else {
+        info.swiftVersion = inputInfo.swiftVersion;
+        firstFile = file;
+      }
+    }
+  }
+}
+
+void ObjCImageInfoSection::writeTo(uint8_t *buf) const {
+  uint32_t flags = info.hasCategoryClassProperties ? 0x40 : 0x0;
+  flags |= info.swiftVersion << 8;
+  write32le(buf + 4, flags);
+}
+
 void macho::createSyntheticSymbols() {
   auto addHeaderSymbol = [](const char *name) {
     symtab->addSynthetic(name, in.header->isec, /*value=*/0,

diff  --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index 4f7d5288c9dcd..afdd46d8a7de4 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -19,6 +19,7 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/MC/StringTableBuilder.h"
 #include "llvm/Support/MathExtras.h"
@@ -600,6 +601,27 @@ class WordLiteralSection final : public SyntheticSection {
   std::unordered_map<uint32_t, uint64_t> literal4Map;
 };
 
+class ObjCImageInfoSection final : public SyntheticSection {
+public:
+  ObjCImageInfoSection();
+  bool isNeeded() const override { return !files.empty(); }
+  uint64_t getSize() const override { return 8; }
+  void addFile(const InputFile *file) {
+    assert(!file->objCImageInfo.empty());
+    files.push_back(file);
+  }
+  void finalizeContents();
+  void writeTo(uint8_t *buf) const override;
+
+private:
+  struct ImageInfo {
+    uint8_t swiftVersion = 0;
+    bool hasCategoryClassProperties = false;
+  } info;
+  static ImageInfo parseImageInfo(const InputFile *);
+  std::vector<const InputFile *> files; // files with image info
+};
+
 struct InStruct {
   const uint8_t *bufferStart = nullptr;
   MachHeaderSection *header = nullptr;
@@ -616,6 +638,7 @@ struct InStruct {
   StubsSection *stubs = nullptr;
   StubHelperSection *stubHelper = nullptr;
   UnwindInfoSection *unwindInfo = nullptr;
+  ObjCImageInfoSection *objCImageInfo = nullptr;
   ConcatInputSection *imageLoaderCache = nullptr;
 };
 

diff  --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index 9395e1a068a35..7fad9f5564ce4 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -1164,6 +1164,10 @@ template <class LP> void Writer::run() {
 
   if (in.stubHelper->isNeeded())
     in.stubHelper->setup();
+
+  if (in.objCImageInfo->isNeeded())
+    in.objCImageInfo->finalizeContents();
+
   // At this point, we should know exactly which output sections are needed,
   // courtesy of scanSymbols() and scanRelocations().
   createOutputSections<LP>();
@@ -1210,6 +1214,7 @@ void macho::createSyntheticSections() {
   in.stubs = make<StubsSection>();
   in.stubHelper = make<StubHelperSection>();
   in.unwindInfo = makeUnwindInfoSection();
+  in.objCImageInfo = make<ObjCImageInfoSection>();
 
   // This section contains space for just a single word, and will be used by
   // dyld to cache an address to the image loader it uses.

diff  --git a/lld/test/MachO/builtin-rename.s b/lld/test/MachO/builtin-rename.s
index c1e511bc64d8c..dc9b7f8f75105 100644
--- a/lld/test/MachO/builtin-rename.s
+++ b/lld/test/MachO/builtin-rename.s
@@ -37,7 +37,6 @@
 # NDATA-DAG: __DATA,__objc_catlist __DATA__objc_catlist
 # NDATA-DAG: __DATA,__objc_nlcatlist __DATA__objc_nlcatlist
 # NDATA-DAG: __DATA,__objc_protolist __DATA__objc_protolist
-# NDATA-DAG: __DATA,__objc_imageinfo __DATA__objc_imageinfo
 # NDATA-DAG: __DATA,__nl_symbol_ptr __IMPORT__pointers
 
 # YDATA-DAG: __DATA_CONST,__auth_got __DATA__auth_got
@@ -52,7 +51,6 @@
 # YDATA-DAG: __DATA_CONST,__objc_catlist __DATA__objc_catlist
 # YDATA-DAG: __DATA_CONST,__objc_nlcatlist __DATA__objc_nlcatlist
 # YDATA-DAG: __DATA_CONST,__objc_protolist __DATA__objc_protolist
-# YDATA-DAG: __DATA_CONST,__objc_imageinfo __DATA__objc_imageinfo
 # YDATA-DAG: __DATA_CONST,__nl_symbol_ptr __IMPORT__pointers
 
 ## LLD doesn't support defining symbols in synthetic sections, so we test them
@@ -133,10 +131,14 @@ __DATA__objc_nlcatlist:
 __DATA__objc_protolist:
   .space 8
 
-.section __DATA,__objc_imageinfo
-.global __DATA__objc_imageinfo
-__DATA__objc_imageinfo:
-  .space 8
+## __objc_imageinfo should get moved under __DATA_CONST as well, but symbols
+## within __objc_imageinfo get dropped during link, so we are cannot test this
+## case using the output of `llvm-objdump --syms`. TODO: rewrite test to use
+## `llvm-readobj --section-headers`, which will avoid this issue.
+# .section __DATA,__objc_imageinfo
+# .global __DATA__objc_imageinfo
+# __DATA__objc_imageinfo:
+#   .space 8
 
 .section __IMPORT,__pointers,non_lazy_symbol_pointers
 .global __IMPORT__pointers

diff  --git a/lld/test/MachO/objc-imageinfo.s b/lld/test/MachO/objc-imageinfo.s
new file mode 100644
index 0000000000000..0523aac841c77
--- /dev/null
+++ b/lld/test/MachO/objc-imageinfo.s
@@ -0,0 +1,172 @@
+# REQUIRES: x86
+
+# RUN: rm -rf %t; split-file %s %t
+
+## ld64 ignores the __objc_imageinfo section entirely if there is no actual
+## ObjC class + category data in the file. LLD doesn't yet do this check, but
+## to make this test work for both linkers, I am inserting an appropriate class
+## definition into each test file.
+# RUN: cat %t/no-category-cls.s   %t/foo-cls.s > %t/no-category-cls-1.s
+# RUN: cat %t/with-category-cls.s %t/foo-cls.s > %t/with-category-cls-1.s
+# RUN: cat %t/ignored-flags.s     %t/foo-cls.s > %t/ignored-flags-1.s
+# RUN: cat %t/invalid-version.s   %t/foo-cls.s > %t/invalid-version-1.s
+# RUN: cat %t/invalid-size.s      %t/foo-cls.s > %t/invalid-size-1.s
+# RUN: cat %t/swift-version-1.s   %t/foo-cls.s > %t/swift-version-1-1.s
+# RUN: cat %t/swift-version-2.s   %t/foo-cls.s > %t/swift-version-2-1.s
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/no-category-cls-1.s   -o %t/no-category-cls.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/with-category-cls-1.s -o %t/with-category-cls.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/ignored-flags-1.s     -o %t/ignored-flags.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/invalid-version-1.s   -o %t/invalid-version.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/swift-version-1-1.s   -o %t/swift-version-1.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/swift-version-2-1.s   -o %t/swift-version-2.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/invalid-size-1.s      -o %t/invalid-size.o
+
+# RUN: %lld -dylib -lSystem %t/with-category-cls.o -o %t/test-with-cat
+# RUN: llvm-objdump --macho --section="__DATA_CONST,__objc_imageinfo" --syms \
+# RUN:   %t/test-with-cat | FileCheck %s --check-prefix=HAS-CAT-CLS \
+# RUN:   --implicit-check-not=_discard_me
+
+# RUN: %lld -dylib -lSystem %t/no-category-cls.o -o %t/test-no-cat
+# RUN: llvm-objdump --macho --section="__DATA_CONST,__objc_imageinfo" --syms \
+# RUN:   %t/test-no-cat | FileCheck %s --check-prefix=NO-CAT-CLS \
+# RUN:   --implicit-check-not=_discard_me
+
+# RUN: %lld -dylib -lSystem %t/no-category-cls.o %t/with-category-cls.o -o %t/test1
+# RUN: llvm-objdump --macho --section="__DATA_CONST,__objc_imageinfo" %t/test1 \
+# RUN:   | FileCheck %s --check-prefix=NO-CAT-CLS
+
+# RUN: %lld -dylib -lSystem %t/with-category-cls.o %t/ignored-flags.o -o %t/test2
+# RUN: llvm-objdump --macho --section="__DATA_CONST,__objc_imageinfo" %t/test2 \
+# RUN:   | FileCheck %s --check-prefix=HAS-CAT-CLS
+
+# RUN: %lld -dylib -lSystem %t/no-category-cls.o %t/ignored-flags.o -o %t/test3
+# RUN: llvm-objdump --macho --section="__DATA_CONST,__objc_imageinfo" %t/test3 \
+# RUN:   | FileCheck %s --check-prefix=NO-CAT-CLS
+
+# RUN: %no-fatal-warnings-lld -dylib -lSystem %t/with-category-cls.o \
+# RUN:   %t/invalid-version.o -o %t/test4 2>&1 | FileCheck %s \
+# RUN:   --check-prefix=IMAGE-VERSION
+# RUN: llvm-objdump --macho --section="__DATA_CONST,__objc_imageinfo" %t/test4 \
+# RUN:   | FileCheck %s --check-prefix=NO-CAT-CLS
+
+# RUN: %no-fatal-warnings-lld -dylib -lSystem %t/no-category-cls.o \
+# RUN:   %t/invalid-version.o -o %t/test5 2>&1 | FileCheck %s \
+# RUN:   --check-prefix=IMAGE-VERSION
+# RUN: llvm-objdump --macho --section="__DATA_CONST,__objc_imageinfo" %t/test5 \
+# RUN:   | FileCheck %s --check-prefix=NO-CAT-CLS
+
+# RUN: %no-fatal-warnings-lld -dylib -lSystem %t/with-category-cls.o \
+# RUN:   %t/invalid-size.o -o %t/test6 2>&1 | FileCheck %s \
+# RUN:   --check-prefix=INVALID-SIZE
+# RUN: llvm-objdump --macho --section="__DATA_CONST,__objc_imageinfo" %t/test6 \
+# RUN:   | FileCheck %s --check-prefix=NO-CAT-CLS
+
+# RUN: not %lld -dylib -lSystem %t/swift-version-1.o %t/swift-version-2.o -o \
+# RUN:   /dev/null 2>&1 | FileCheck %s --check-prefix=SWIFT-MISMATCH-12
+# RUN: not %lld -dylib -lSystem %t/swift-version-2.o %t/swift-version-1.o -o \
+# RUN:   /dev/null 2>&1 | FileCheck %s --check-prefix=SWIFT-MISMATCH-21
+
+## with-category-cls.o does not have a Swift version (it's set to zero) and
+## should be compatible with any Swift version.
+# RUN: %lld -dylib -lSystem %t/with-category-cls.o %t/swift-version-1.o -o %t/swift-v1
+# RUN: llvm-objdump --macho --section="__DATA_CONST,__objc_imageinfo" \
+# RUN:   %t/swift-v1 | FileCheck %s --check-prefix=SWIFT-V1
+# RUN: %lld -dylib -lSystem %t/with-category-cls.o %t/swift-version-2.o -o %t/swift-v2
+# RUN: llvm-objdump --macho --section="__DATA_CONST,__objc_imageinfo" \
+# RUN:   %t/swift-v2 | FileCheck %s --check-prefix=SWIFT-V2
+
+# HAS-CAT-CLS:       Contents of (__DATA_CONST,__objc_imageinfo) section
+# HAS-CAT-CLS:       00 00 00 40 00 00 00
+# HAS-CAT-CLS-EMPTY:
+
+# NO-CAT-CLS:       Contents of (__DATA_CONST,__objc_imageinfo) section
+# NO-CAT-CLS:       00 00 00 00 00 00 00
+# NO-CAT-CLS-EMPTY:
+
+# SWIFT-V1:       Contents of (__DATA_CONST,__objc_imageinfo) section
+# SWIFT-V1:       00 00 00 40 01 00 00
+# SWIFT-V1-EMPTY:
+
+# SWIFT-V2:       Contents of (__DATA_CONST,__objc_imageinfo) section
+# SWIFT-V2:       00 00 00 40 02 00 00
+# SWIFT-V2-EMPTY:
+
+# IMAGE-VERSION: warning: {{.*}}invalid-version.o: invalid __objc_imageinfo version
+
+# INVALID-SIZE: warning: {{.*}}invalid-size.o: invalid __objc_imageinfo size
+
+# SWIFT-MISMATCH-12: error: Swift version mismatch: {{.*}}swift-version-1.o has version 1.0 but {{.*}}swift-version-2.o has version 1.1
+# SWIFT-MISMATCH-21: error: Swift version mismatch: {{.*}}swift-version-2.o has version 1.1 but {{.*}}swift-version-1.o has version 1.0
+
+#--- no-category-cls.s
+.section __DATA,__objc_imageinfo,regular,no_dead_strip
+## ld64 discards any symbols in this section; we follow suit.
+_discard_me:
+.long 0
+.long 0
+
+#--- with-category-cls.s
+.section __DATA,__objc_imageinfo,regular,no_dead_strip
+_discard_me:
+.long 0
+.long 0x40 ## "has category class properties" flag
+
+#--- ignored-flags.s
+.section __DATA,__objc_imageinfo,regular,no_dead_strip
+.long 0
+## Only the 0x40 flag is carried through to the output binary.
+.long (0x40 | 0x20 | 0x4 | 0x2)
+
+#--- invalid-version.s
+.section __DATA,__objc_imageinfo,regular,no_dead_strip
+.long 1 ## only 0 is valid; the flag field below will not be parsed.
+.long 0x40
+
+#--- invalid-size.s
+.section __DATA,__objc_imageinfo
+.long 0
+
+#--- swift-version-1.s
+.section __DATA,__objc_imageinfo,regular,no_dead_strip
+.long 0
+.byte 0x40
+.byte 0x1 ## Swift version
+.short 0
+
+#--- swift-version-2.s
+.section __DATA,__objc_imageinfo,regular,no_dead_strip
+.long 0
+.byte 0x40
+.byte 0x2 ## Swift version
+.short 0
+
+#--- foo-cls.s
+.section __TEXT,__objc_classname,cstring_literals
+L_CAT_NAME:
+.asciz "barcat"
+
+.section __DATA,__objc_data
+.p2align 3
+_OBJC_CLASS_$_FooClass:
+.space 40
+
+.section __DATA,__objc_const
+.p2align 3
+__OBJC_$_CATEGORY_INSTANCE_METHODS_FooClass_$_barcat:
+
+.p2align 3
+__OBJC_$_CATEGORY_FooClass_$_barcat:
+.quad L_CAT_NAME
+.quad _OBJC_CLASS_$_FooClass
+.quad __OBJC_$_CATEGORY_INSTANCE_METHODS_FooClass_$_barcat
+.quad 0
+.quad 0
+.quad 0
+.quad 0
+.long 64
+.space 4
+
+.section __DATA,__objc_catlist,regular,no_dead_strip
+.p2align 3
+.quad __OBJC_$_CATEGORY_FooClass_$_barcat


        


More information about the llvm-commits mailing list