[lld] [lld-macho] icf objc stubs (PR #79730)

Kyungwoo Lee via llvm-commits llvm-commits at lists.llvm.org
Sat Jan 27 22:40:20 PST 2024


https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/79730

>From 28db5b14b85e416dece78459d1a389cd50e4f2f9 Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Sat, 27 Jan 2024 14:25:21 -0800
Subject: [PATCH 1/2] [lld-macho] dead-strip objc stubs

---
 lld/MachO/Driver.cpp                   |  5 ++---
 lld/MachO/SyntheticSections.cpp        | 14 +++++++++++--
 lld/MachO/SyntheticSections.h          |  2 ++
 lld/MachO/Writer.cpp                   | 10 +++++++++-
 lld/test/MachO/arm64-objc-stubs-dead.s | 27 ++++++++++++++++++++++++++
 5 files changed, 52 insertions(+), 6 deletions(-)
 create mode 100644 lld/test/MachO/arm64-objc-stubs-dead.s

diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 411fbcfcf233eb8..519bd483dacb688 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1275,11 +1275,10 @@ static void foldIdenticalLiterals() {
 static void addSynthenticMethnames() {
   std::string &data = *make<std::string>();
   llvm::raw_string_ostream os(data);
-  const int prefixLength = ObjCStubsSection::symbolPrefix.size();
   for (Symbol *sym : symtab->getSymbols())
     if (isa<Undefined>(sym))
-      if (sym->getName().starts_with(ObjCStubsSection::symbolPrefix))
-        os << sym->getName().drop_front(prefixLength) << '\0';
+      if (ObjCStubsSection::isObjCStubSymbol(sym))
+        os << ObjCStubsSection::getMethName(sym) << '\0';
 
   if (data.empty())
     return;
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 53220ad04b842c1..42a36e9b13902d3 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -814,9 +814,19 @@ ObjCStubsSection::ObjCStubsSection()
               : target->objcStubsSmallAlignment;
 }
 
+bool ObjCStubsSection::isObjCStubSymbol(Symbol *sym) {
+  return sym->getName().starts_with(symbolPrefix);
+}
+
+StringRef ObjCStubsSection::getMethName(Symbol *sym) {
+  assert(isObjCStubSymbol(sym) && "not an objc stub");
+  auto name = sym->getName();
+  StringRef methname = name.drop_front(symbolPrefix.size());
+  return methname;
+}
+
 void ObjCStubsSection::addEntry(Symbol *sym) {
-  assert(sym->getName().starts_with(symbolPrefix) && "not an objc stub");
-  StringRef methname = sym->getName().drop_front(symbolPrefix.size());
+  StringRef methname = getMethName(sym);
   offsets.push_back(
       in.objcMethnameSection->getStringOffset(methname).outSecOff);
 
diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index 5fb7b6e09e8e63e..aee96b9819e58a4 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -332,6 +332,8 @@ class ObjCStubsSection final : public SyntheticSection {
   void setUp();
 
   static constexpr llvm::StringLiteral symbolPrefix = "_objc_msgSend$";
+  static bool isObjCStubSymbol(Symbol *sym);
+  static StringRef getMethName(Symbol *sym);
 
 private:
   std::vector<Defined *> symbols;
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index 1b0e64abe843adf..3634c626f0692d1 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -736,8 +736,16 @@ void Writer::scanSymbols() {
       dysym->getFile()->refState =
           std::max(dysym->getFile()->refState, dysym->getRefState());
     } else if (isa<Undefined>(sym)) {
-      if (sym->getName().starts_with(ObjCStubsSection::symbolPrefix))
+      if (ObjCStubsSection::isObjCStubSymbol(sym)) {
+        // When -dead_strip is enabled, we don't want to emit any dead stubs.
+        // Although this stub symbol is yet undefined, addSym() was called
+        // during MarkLive.
+        if (config->deadStrip) {
+          if (!sym->isLive())
+            continue;
+        }
         in.objcStubs->addEntry(sym);
+      }
     }
   }
 
diff --git a/lld/test/MachO/arm64-objc-stubs-dead.s b/lld/test/MachO/arm64-objc-stubs-dead.s
new file mode 100644
index 000000000000000..5dcb171c17eac58
--- /dev/null
+++ b/lld/test/MachO/arm64-objc-stubs-dead.s
@@ -0,0 +1,27 @@
+# REQUIRES: aarch64
+
+# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t.o
+
+# RUN: %lld -arch arm64 -lSystem -U _objc_msgSend -o %t.out %t.o
+# RUN: llvm-nm %t.out | FileCheck %s
+# RUN: %lld -arch arm64 -lSystem -U _objc_msgSend -dead_strip -o %t.out %t.o
+# RUN: llvm-nm %t.out | FileCheck %s --check-prefix=DEAD
+
+# CHECK: _foo
+# CHECK: _objc_msgSend$length
+
+# DEAD-NOT: _foo
+# DEAD-NOT: _objc_msgSend$length
+
+.section __TEXT,__text
+
+.globl _foo
+_foo:
+  bl  _objc_msgSend$length
+  ret
+
+.globl _main
+_main:
+  ret
+
+.subsections_via_symbols

>From 0d7bb655f49bc4a535bb847c853b4b5ff1f97883 Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Sat, 27 Jan 2024 20:05:40 -0800
Subject: [PATCH 2/2] [lld-macho] icf objc stubs

---
 lld/MachO/SyntheticSections.cpp    | 68 ++++++++++++++++++++++++++----
 lld/MachO/SyntheticSections.h      |  8 +++-
 lld/MachO/Writer.cpp               |  1 +
 lld/test/MachO/objc-selrefs.s      |  3 --
 lld/test/MachO/x86-64-objc-stubs.s | 10 +++++
 5 files changed, 77 insertions(+), 13 deletions(-)

diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 42a36e9b13902d3..d3674a2b2c5dcc5 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -825,10 +825,43 @@ StringRef ObjCStubsSection::getMethName(Symbol *sym) {
   return methname;
 }
 
+void ObjCStubsSection::initialize() {
+  // Do not fold selrefs without ICF.
+  if (config->icfLevel == ICFLevel::none)
+    return;
+
+  // Search methnames already referenced in __objc_selrefs
+  // Map the name to the corresponding selref entry
+  // which we will reuse when creating objc stubs.
+  for (ConcatInputSection *isec : inputSections) {
+    if (isec->shouldOmitFromOutput())
+      continue;
+    if (isec->getName() != "__objc_selrefs")
+      continue;
+    // We expect a single relocation per selref entry to __objc_methname that
+    // might be aggregated.
+    assert(isec->relocs.size() == 1);
+    auto Reloc = isec->relocs[0];
+    if (const auto *sym = Reloc.referent.dyn_cast<Symbol *>()) {
+      if (const auto *d = dyn_cast<Defined>(sym)) {
+        auto *cisec = cast<CStringInputSection>(d->isec);
+        auto methname = cisec->getStringRefAtOffset(d->value);
+        methnameToselref[methname] = isec;
+      }
+    }
+  }
+}
+
 void ObjCStubsSection::addEntry(Symbol *sym) {
   StringRef methname = getMethName(sym);
-  offsets.push_back(
-      in.objcMethnameSection->getStringOffset(methname).outSecOff);
+  // We will create a selref entry for each unique methname.
+  if (!methnameToselref.count(methname) &&
+      !methnameToidxOffsetPair.count(methname)) {
+    auto methnameOffset =
+        in.objcMethnameSection->getStringOffset(methname).outSecOff;
+    auto selIndex = methnameToidxOffsetPair.size();
+    methnameToidxOffsetPair[methname] = {selIndex, methnameOffset};
+  }
 
   auto stubSize = config->objcStubsMode == ObjCStubsMode::fast
                       ? target->objcStubsFastSize
@@ -863,10 +896,12 @@ void ObjCStubsSection::setUp() {
       in.stubs->addEntry(objcMsgSend);
   }
 
-  size_t size = offsets.size() * target->wordSize;
+  size_t size = methnameToidxOffsetPair.size() * target->wordSize;
   uint8_t *selrefsData = bAlloc().Allocate<uint8_t>(size);
-  for (size_t i = 0, n = offsets.size(); i < n; ++i)
-    write64le(&selrefsData[i * target->wordSize], offsets[i]);
+  for (const auto &[methname, idxOffsetPair] : methnameToidxOffsetPair) {
+    const auto &[idx, offset] = idxOffsetPair;
+    write64le(&selrefsData[idx * target->wordSize], offset);
+  }
 
   in.objcSelrefs =
       makeSyntheticInputSection(segment_names::data, section_names::objcSelrefs,
@@ -875,12 +910,13 @@ void ObjCStubsSection::setUp() {
                                 /*align=*/target->wordSize);
   in.objcSelrefs->live = true;
 
-  for (size_t i = 0, n = offsets.size(); i < n; ++i) {
+  for (const auto &[methname, idxOffsetPair] : methnameToidxOffsetPair) {
+    const auto &[idx, offset] = idxOffsetPair;
     in.objcSelrefs->relocs.push_back(
         {/*type=*/target->unsignedRelocType,
          /*pcrel=*/false, /*length=*/3,
-         /*offset=*/static_cast<uint32_t>(i * target->wordSize),
-         /*addend=*/offsets[i] * in.objcMethnameSection->align,
+         /*offset=*/static_cast<uint32_t>(idx * target->wordSize),
+         /*addend=*/offset * in.objcMethnameSection->align,
          /*referent=*/in.objcMethnameSection->isec});
   }
 
@@ -904,8 +940,22 @@ void ObjCStubsSection::writeTo(uint8_t *buf) const {
   uint64_t stubOffset = 0;
   for (size_t i = 0, n = symbols.size(); i < n; ++i) {
     Defined *sym = symbols[i];
+    uint64_t selBaseAddr;
+    uint64_t selIndex;
+
+    auto methname = getMethName(sym);
+    auto j = methnameToselref.find(methname);
+    if (j != methnameToselref.end()) {
+      selBaseAddr = j->second->getVA(0);
+      selIndex = 0;
+    } else {
+      auto k = methnameToidxOffsetPair.find(methname);
+      assert(k != methnameToIdxOffsetPair.end());
+      selBaseAddr = in.objcSelrefs->getVA();
+      selIndex = k->second.first;
+    }
     target->writeObjCMsgSendStub(buf + stubOffset, sym, in.objcStubs->addr,
-                                 stubOffset, in.objcSelrefs->getVA(), i,
+                                 stubOffset, selBaseAddr, selIndex,
                                  objcMsgSend);
   }
 }
diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index aee96b9819e58a4..f5adcf7126ecd64 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -324,6 +324,7 @@ class StubHelperSection final : public SyntheticSection {
 class ObjCStubsSection final : public SyntheticSection {
 public:
   ObjCStubsSection();
+  void initialize();
   void addEntry(Symbol *sym);
   uint64_t getSize() const override;
   bool isNeeded() const override { return !symbols.empty(); }
@@ -337,7 +338,12 @@ class ObjCStubsSection final : public SyntheticSection {
 
 private:
   std::vector<Defined *> symbols;
-  std::vector<uint32_t> offsets;
+  // Existing mapping from methname to selref (0 index is assumed).
+  llvm::StringMap<InputSection *> methnameToselref;
+  // Newly created mapping from methname to the pair of index (selref) and
+  // offset (methname).
+  llvm::MapVector<StringRef, std::pair<uint32_t, uint32_t>>
+      methnameToidxOffsetPair;
   Symbol *objcMsgSend = nullptr;
 };
 
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index 3634c626f0692d1..65b598d1d7c422a 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -720,6 +720,7 @@ static void addNonWeakDefinition(const Defined *defined) {
 
 void Writer::scanSymbols() {
   TimeTraceScope timeScope("Scan symbols");
+  in.objcStubs->initialize();
   for (Symbol *sym : symtab->getSymbols()) {
     if (auto *defined = dyn_cast<Defined>(sym)) {
       if (!defined->isLive())
diff --git a/lld/test/MachO/objc-selrefs.s b/lld/test/MachO/objc-selrefs.s
index 9dff440727ac34b..6d144f4938b4542 100644
--- a/lld/test/MachO/objc-selrefs.s
+++ b/lld/test/MachO/objc-selrefs.s
@@ -24,7 +24,6 @@
 # SELREFS-NEXT: __TEXT:__objc_methname:length
 # SELREFS-EMPTY:
 
-## We don't yet support dedup'ing implicitly-defined selrefs.
 # RUN: %lld -dylib -arch arm64 -lSystem --icf=all -o %t/explicit-and-implicit \
 # RUN:   %t/explicit-selrefs-1.o %t/explicit-selrefs-2.o %t/implicit-selrefs.o
 # RUN: llvm-otool -vs __DATA __objc_selrefs %t/explicit-and-implicit \
@@ -43,8 +42,6 @@
 # EXPLICIT-AND-IMPLICIT:      Contents of (__DATA,__objc_selrefs) section
 # EXPLICIT-AND-IMPLICIT-NEXT: __TEXT:__objc_methname:foo
 # EXPLICIT-AND-IMPLICIT-NEXT: __TEXT:__objc_methname:bar
-# NOTE: Ideally this wouldn't exist, but while it does it needs to point to the deduplicated string
-# EXPLICIT-AND-IMPLICIT-NEXT: __TEXT:__objc_methname:foo
 # EXPLICIT-AND-IMPLICIT-NEXT: __TEXT:__objc_methname:length
 
 #--- explicit-selrefs-1.s
diff --git a/lld/test/MachO/x86-64-objc-stubs.s b/lld/test/MachO/x86-64-objc-stubs.s
index 2dd8d559377156b..1001d94b8d2cd12 100644
--- a/lld/test/MachO/x86-64-objc-stubs.s
+++ b/lld/test/MachO/x86-64-objc-stubs.s
@@ -10,6 +10,9 @@
 
 # WARNING: warning: -objc_stubs_small is not yet implemented, defaulting to -objc_stubs_fast
 
+# RUN: %lld -arch x86_64 -lSystem -o %t-icf.out --icf=all %t.o
+# RUN: llvm-otool -vs __DATA __objc_selrefs %t-icf.out | FileCheck %s --check-prefix=ICF
+
 # CHECK: Sections:
 # CHECK: __got            {{[0-9a-f]*}} [[#%x, GOTSTART:]] DATA
 # CHECK: __objc_selrefs   {{[0-9a-f]*}} [[#%x, SELSTART:]] DATA
@@ -21,6 +24,13 @@
 # CHECK-NEXT: [[#%x, FOOSELREF:]]  __TEXT:__objc_methname:foo
 # CHECK-NEXT: [[#%x, LENGTHSELREF:]]  __TEXT:__objc_methname:length
 
+# ICF: Contents of (__DATA,__objc_selrefs) section
+
+# ICF-NEXT: {{[0-9a-f]*}}  __TEXT:__objc_methname:foo
+# ICF-NEXT: {{[0-9a-f]*}}  __TEXT:__objc_methname:bar
+# ICF-NEXT: {{[0-9a-f]*}}  __TEXT:__objc_methname:length
+# ICF-EMPTY:
+
 # CHECK: Contents of (__TEXT,__objc_stubs) section
 
 # CHECK-NEXT: _objc_msgSend$foo:



More information about the llvm-commits mailing list