[lld] 51ed383 - [lld-macho] Make relative method lists work on x86-64 (#103905)

via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 14 11:42:10 PDT 2024


Author: Daniel Bertalan
Date: 2024-08-14T20:42:07+02:00
New Revision: 51ed383d3803051922ab5f7ff19a38454a415ddb

URL: https://github.com/llvm/llvm-project/commit/51ed383d3803051922ab5f7ff19a38454a415ddb
DIFF: https://github.com/llvm/llvm-project/commit/51ed383d3803051922ab5f7ff19a38454a415ddb.diff

LOG: [lld-macho] Make relative method lists work on x86-64 (#103905)

Local data is referenced in Objective-C metadata via section + offset
relocations on x86-64 rather than via symbols. Without this change, we
would crash on incorrect casts of the referents to `Defined`.

A basic test based on the existing `objc-relative-method-lists-simple.s`
adopted to x86-64 is added.

Added: 
    lld/test/MachO/objc-relative-method-lists-simple-x86.s

Modified: 
    lld/MachO/ObjC.cpp
    lld/MachO/Relocations.cpp
    lld/MachO/Relocations.h
    lld/MachO/SyntheticSections.cpp

Removed: 
    


################################################################################
diff  --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index 9c056f40aa943f..18b7521ed1ad2a 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -186,28 +186,6 @@ ObjcCategoryChecker::ObjcCategoryChecker()
       roClassLayout(target->wordSize), listHeaderLayout(target->wordSize),
       methodLayout(target->wordSize) {}
 
-// \p r must point to an offset within a CStringInputSection or a
-// ConcatInputSection
-static StringRef getReferentString(const Reloc &r) {
-  if (auto *isec = r.referent.dyn_cast<InputSection *>())
-    return cast<CStringInputSection>(isec)->getStringRefAtOffset(r.addend);
-
-  auto *sym = cast<Defined>(r.referent.get<Symbol *>());
-  auto *symIsec = sym->isec();
-  auto symOffset = sym->value + r.addend;
-
-  if (auto *s = dyn_cast_or_null<CStringInputSection>(symIsec))
-    return s->getStringRefAtOffset(symOffset);
-
-  if (isa<ConcatInputSection>(symIsec)) {
-    auto strData = symIsec->data.slice(symOffset);
-    const char *pszData = reinterpret_cast<const char *>(strData.data());
-    return StringRef(pszData, strnlen(pszData, strData.size()));
-  }
-
-  llvm_unreachable("unknown reference section in getReferentString");
-}
-
 void ObjcCategoryChecker::parseMethods(const ConcatInputSection *methodsIsec,
                                        const Symbol *methodContainerSym,
                                        const ConcatInputSection *containerIsec,
@@ -219,7 +197,7 @@ void ObjcCategoryChecker::parseMethods(const ConcatInputSection *methodsIsec,
         methodLayout.nameOffset)
       continue;
 
-    CachedHashStringRef methodName(getReferentString(r));
+    CachedHashStringRef methodName(r.getReferentString());
     // +load methods are special: all implementations are called by the runtime
     // even if they are part of the same class. Thus there is no need to check
     // for duplicates.
@@ -251,14 +229,14 @@ void ObjcCategoryChecker::parseMethods(const ConcatInputSection *methodsIsec,
                          ->getReferentInputSection();
       nameReloc = roIsec->getRelocAt(roClassLayout.nameOffset);
     }
-    StringRef containerName = getReferentString(*nameReloc);
+    StringRef containerName = nameReloc->getReferentString();
     StringRef methPrefix = mKind == MK_Instance ? "-" : "+";
 
     // We should only ever encounter collisions when parsing category methods
     // (since the Class struct is parsed before any of its categories).
     assert(mcKind == MCK_Category);
     StringRef newCatName =
-        getReferentString(*containerIsec->getRelocAt(catLayout.nameOffset));
+        containerIsec->getRelocAt(catLayout.nameOffset)->getReferentString();
 
     auto formatObjAndSrcFileName = [](const InputSection *section) {
       lld::macho::InputFile *inputFile = section->getFile();
@@ -809,7 +787,7 @@ void ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
   assert(extInfo.objFileForMergeData &&
          "Expected to already have valid objextInfo.objFileForMergeData");
 
-  StringRef catName = getReferentString(*catNameReloc);
+  StringRef catName = catNameReloc->getReferentString();
   extInfo.mergedContainerName += catName.str();
 
   // Parse base class

diff  --git a/lld/MachO/Relocations.cpp b/lld/MachO/Relocations.cpp
index afe7f454e6a230..e8ede19d1fda87 100644
--- a/lld/MachO/Relocations.cpp
+++ b/lld/MachO/Relocations.cpp
@@ -31,6 +31,31 @@ InputSection *Reloc::getReferentInputSection() const {
   }
 }
 
+StringRef Reloc::getReferentString() const {
+  if (auto *isec = referent.dyn_cast<InputSection *>()) {
+    const auto *cisec = dyn_cast<CStringInputSection>(isec);
+    assert(cisec && "referent must be a CStringInputSection");
+    return cisec->getStringRefAtOffset(addend);
+  }
+
+  auto *sym = dyn_cast<Defined>(referent.get<Symbol *>());
+  assert(sym && "referent must be a Defined symbol");
+
+  auto *symIsec = sym->isec();
+  auto symOffset = sym->value + addend;
+
+  if (auto *s = dyn_cast_or_null<CStringInputSection>(symIsec))
+    return s->getStringRefAtOffset(symOffset);
+
+  if (isa<ConcatInputSection>(symIsec)) {
+    auto strData = symIsec->data.slice(symOffset);
+    const char *pszData = reinterpret_cast<const char *>(strData.data());
+    return StringRef(pszData, strnlen(pszData, strData.size()));
+  }
+
+  llvm_unreachable("unknown reference section in getReferentString");
+}
+
 bool macho::validateSymbolRelocation(const Symbol *sym,
                                      const InputSection *isec, const Reloc &r) {
   const RelocAttrs &relocAttrs = target->getRelocAttrs(r.type);

diff  --git a/lld/MachO/Relocations.h b/lld/MachO/Relocations.h
index 5f161c8fcbfde3..b2f621451349ee 100644
--- a/lld/MachO/Relocations.h
+++ b/lld/MachO/Relocations.h
@@ -69,6 +69,10 @@ struct Reloc {
         addend(addend), referent(referent) {}
 
   InputSection *getReferentInputSection() const;
+
+  // Must point to an offset within a CStringInputSection or a
+  // ConcatInputSection.
+  llvm::StringRef getReferentString() const;
 };
 
 bool validateSymbolRelocation(const Symbol *, const InputSection *,

diff  --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 3d77835d117efe..6b4ec4989ca4a1 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -2010,11 +2010,8 @@ void ObjCMethListSection::setUp() {
     while (methodNameOff < isec->data.size()) {
       const Reloc *reloc = isec->getRelocAt(methodNameOff);
       assert(reloc && "Relocation expected at method list name slot");
-      auto *def = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
-      assert(def && "Expected valid Defined at method list name slot");
-      auto *cisec = cast<CStringInputSection>(def->isec());
-      assert(cisec && "Expected method name to be in a CStringInputSection");
-      auto methname = cisec->getStringRefAtOffset(def->value);
+
+      StringRef methname = reloc->getReferentString();
       if (!ObjCSelRefsHelper::getSelRef(methname))
         ObjCSelRefsHelper::makeSelRef(methname);
 
@@ -2114,19 +2111,23 @@ void ObjCMethListSection::writeRelativeOffsetForIsec(
     uint32_t &outSecOff, bool useSelRef) const {
   const Reloc *reloc = isec->getRelocAt(inSecOff);
   assert(reloc && "Relocation expected at __objc_methlist Offset");
-  auto *def = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
-  assert(def && "Expected all syms in __objc_methlist to be defined");
-  uint32_t symVA = def->getVA();
 
+  uint32_t symVA = 0;
   if (useSelRef) {
-    auto *cisec = cast<CStringInputSection>(def->isec());
-    auto methname = cisec->getStringRefAtOffset(def->value);
+    StringRef methname = reloc->getReferentString();
     ConcatInputSection *selRef = ObjCSelRefsHelper::getSelRef(methname);
     assert(selRef && "Expected all selector names to already be already be "
                      "present in __objc_selrefs");
     symVA = selRef->getVA();
     assert(selRef->data.size() == sizeof(target->wordSize) &&
            "Expected one selref per ConcatInputSection");
+  } else if (reloc->referent.is<Symbol *>()) {
+    auto *def = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
+    assert(def && "Expected all syms in __objc_methlist to be defined");
+    symVA = def->getVA();
+  } else {
+    auto *isec = reloc->referent.get<InputSection *>();
+    symVA = isec->getVA(reloc->addend);
   }
 
   uint32_t currentVA = isec->getVA() + outSecOff;

diff  --git a/lld/test/MachO/objc-relative-method-lists-simple-x86.s b/lld/test/MachO/objc-relative-method-lists-simple-x86.s
new file mode 100644
index 00000000000000..8ad9c0f5f60f82
--- /dev/null
+++ b/lld/test/MachO/objc-relative-method-lists-simple-x86.s
@@ -0,0 +1,255 @@
+# REQUIRES: x86
+# UNSUPPORTED: target=arm{{.*}}-unknown-linux-gnueabihf
+# RUN: rm -rf %t; split-file %s %t && cd %t
+
+## Compile rel_dylib.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos -o rel_dylib.o simple_class.s
+
+## Test relative method lists
+# RUN: %no-lsystem-lld rel_dylib.o -o rel_dylib.dylib -map rel_dylib.map -dylib -objc_relative_method_lists
+# RUN: llvm-objdump --macho --objc-meta-data rel_dylib.dylib  | FileCheck %s --check-prefix=CHK_REL
+
+## Test relative method lists + dead-strip
+# RUN: %no-lsystem-lld rel_dylib.o -o rel_dylib.dylib -map rel_dylib.map -dylib -objc_relative_method_lists -dead_strip
+# RUN: llvm-objdump --macho --objc-meta-data rel_dylib.dylib  | FileCheck %s --check-prefix=CHK_REL
+
+## Test traditional method lists (no relative offsets)
+# RUN: %no-lsystem-lld rel_dylib.o -o rel_dylib.dylib -map rel_dylib.map -dylib -no_objc_relative_method_lists
+# RUN: llvm-objdump --macho --objc-meta-data rel_dylib.dylib  | FileCheck %s --check-prefix=CHK_NO_REL
+
+
+CHK_REL:       Contents of (__DATA_CONST,__objc_classlist) section
+CHK_REL-NEXT:  _OBJC_CLASS_$_MyClass
+CHK_REL:       baseMethods
+CHK_REL-NEXT:  entsize 12 (relative)
+CHK_REL-NEXT:  count 3
+CHK_REL-NEXT:   name 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) instance_method_00
+CHK_REL-NEXT:  types 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) v16 at 0:8
+CHK_REL-NEXT:    imp 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) -[MyClass instance_method_00]
+CHK_REL-NEXT:   name 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) instance_method_01
+CHK_REL-NEXT:  types 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) v16 at 0:8
+CHK_REL-NEXT:    imp 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) -[MyClass instance_method_01]
+CHK_REL-NEXT:   name 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) instance_method_02
+CHK_REL-NEXT:  types 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) v16 at 0:8
+CHK_REL-NEXT:    imp 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) -[MyClass instance_method_02]
+
+CHK_REL:       Meta Class
+CHK_REL-NEXT:  isa 0x{{[0-9a-f]*}} _OBJC_METACLASS_$_MyClass
+CHK_REL:       baseMethods 0x{{[0-9a-f]*}} (struct method_list_t *)
+CHK_REL-NEXT:  entsize 12 (relative)
+CHK_REL-NEXT:  count 3
+CHK_REL-NEXT:   name 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}})  class_method_00
+CHK_REL-NEXT:  types 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}})  v16 at 0:8
+CHK_REL-NEXT:    imp 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}})  +[MyClass class_method_00]
+CHK_REL-NEXT:   name 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}})  class_method_01
+CHK_REL-NEXT:  types 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}})  v16 at 0:8
+CHK_REL-NEXT:    imp 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}})  +[MyClass class_method_01]
+CHK_REL-NEXT:   name 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}})  class_method_02
+CHK_REL-NEXT:  types 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}})  v16 at 0:8
+CHK_REL-NEXT:    imp 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}})  +[MyClass class_method_02]
+
+
+CHK_NO_REL-NOT: (relative)
+
+CHK_NO_REL:           Contents of (__DATA_CONST,__objc_classlist) section
+CHK_NO_REL-NEXT:      _OBJC_CLASS_$_MyClass
+
+CHK_NO_REL:            baseMethods 0x{{[0-9a-f]*}} (struct method_list_t *)
+CHK_NO_REL-NEXT:		   entsize 24
+CHK_NO_REL-NEXT:		     count 3
+CHK_NO_REL-NEXT:		      name 0x{{[0-9a-f]*}} instance_method_00
+CHK_NO_REL-NEXT:		     types 0x{{[0-9a-f]*}} v16 at 0:8
+CHK_NO_REL-NEXT:		       imp -[MyClass instance_method_00]
+CHK_NO_REL-NEXT:		      name 0x{{[0-9a-f]*}} instance_method_01
+CHK_NO_REL-NEXT:		     types 0x{{[0-9a-f]*}} v16 at 0:8
+CHK_NO_REL-NEXT:		       imp -[MyClass instance_method_01]
+CHK_NO_REL-NEXT:		      name 0x{{[0-9a-f]*}} instance_method_02
+CHK_NO_REL-NEXT:		     types 0x{{[0-9a-f]*}} v16 at 0:8
+CHK_NO_REL-NEXT:		       imp -[MyClass instance_method_02]
+
+
+CHK_NO_REL:             Meta Class
+CHK_NO_REL-NEXT:        _OBJC_METACLASS_$_MyClass
+
+CHK_NO_REL:             baseMethods 0x{{[0-9a-f]*}} (struct method_list_t *)
+CHK_NO_REL-NEXT:		   entsize 24
+CHK_NO_REL-NEXT:		     count 3
+CHK_NO_REL-NEXT:		      name 0x{{[0-9a-f]*}} class_method_00
+CHK_NO_REL-NEXT:		     types 0x{{[0-9a-f]*}} v16 at 0:8
+CHK_NO_REL-NEXT:		       imp +[MyClass class_method_00]
+CHK_NO_REL-NEXT:		      name 0x{{[0-9a-f]*}} class_method_01
+CHK_NO_REL-NEXT:		     types 0x{{[0-9a-f]*}} v16 at 0:8
+CHK_NO_REL-NEXT:		       imp +[MyClass class_method_01]
+CHK_NO_REL-NEXT:		      name 0x{{[0-9a-f]*}} class_method_02
+CHK_NO_REL-NEXT:		     types 0x{{[0-9a-f]*}} v16 at 0:8
+CHK_NO_REL-NEXT:		       imp +[MyClass class_method_02]
+
+
+######################## Generate simple_class.s #########################
+# clang -c simple_class.mm -s -o simple_class.s -target x86_64-apple-macos11 -Oz
+
+########################       simple_class.mm       ########################
+#  __attribute__((objc_root_class))
+#  @interface MyClass
+#  - (void)instance_method_00;
+#  - (void)instance_method_01;
+#  - (void)instance_method_02;
+#  + (void)class_method_00;
+#  + (void)class_method_01;
+#  + (void)class_method_02;
+#  @end
+#
+#  @implementation MyClass
+#  - (void)instance_method_00 {}
+#  - (void)instance_method_01 {}
+#  - (void)instance_method_02 {}
+#  + (void)class_method_00 {}
+#  + (void)class_method_01 {}
+#  + (void)class_method_02 {}
+#  @end
+#
+#  void *_objc_empty_cache;
+#
+
+#--- objc-macros.s
+.macro .objc_selector_def name
+	.p2align	2
+"\name":
+	.cfi_startproc
+	ret
+	.cfi_endproc
+.endm
+
+#--- simple_class.s
+.include "objc-macros.s"
+
+.section	__TEXT,__text,regular,pure_instructions
+.build_version macos, 11, 0
+
+.objc_selector_def "-[MyClass instance_method_00]"
+.objc_selector_def "-[MyClass instance_method_01]"
+.objc_selector_def "-[MyClass instance_method_02]"
+
+.objc_selector_def "+[MyClass class_method_00]"
+.objc_selector_def "+[MyClass class_method_01]"
+.objc_selector_def "+[MyClass class_method_02]"
+
+.section	__DATA,__objc_data
+.globl	_OBJC_CLASS_$_MyClass
+.p2align	3, 0x0
+_OBJC_CLASS_$_MyClass:
+	.quad	_OBJC_METACLASS_$_MyClass
+	.quad	0
+	.quad	__objc_empty_cache
+	.quad	0
+	.quad	__OBJC_CLASS_RO_$_MyClass
+
+	.globl	_OBJC_METACLASS_$_MyClass
+	.p2align	3, 0x0
+_OBJC_METACLASS_$_MyClass:
+	.quad	_OBJC_METACLASS_$_MyClass
+	.quad	_OBJC_CLASS_$_MyClass
+	.quad	__objc_empty_cache
+	.quad	0
+	.quad	__OBJC_METACLASS_RO_$_MyClass
+
+	.section	__TEXT,__objc_classname,cstring_literals
+L_OBJC_CLASS_NAME_:
+	.asciz	"MyClass"
+
+	.section	__TEXT,__objc_methname,cstring_literals
+L_OBJC_METH_VAR_NAME_:
+	.asciz	"class_method_00"
+
+	.section	__TEXT,__objc_methtype,cstring_literals
+L_OBJC_METH_VAR_TYPE_:
+	.asciz	"v16 at 0:8"
+
+	.section	__TEXT,__objc_methname,cstring_literals
+L_OBJC_METH_VAR_NAME_.1:
+	.asciz	"class_method_01"
+
+L_OBJC_METH_VAR_NAME_.2:
+	.asciz	"class_method_02"
+
+	.section	__DATA,__objc_const
+	.p2align	3, 0x0
+__OBJC_$_CLASS_METHODS_MyClass:
+	.long	24
+	.long	3
+	.quad	L_OBJC_METH_VAR_NAME_
+	.quad	L_OBJC_METH_VAR_TYPE_
+	.quad	"+[MyClass class_method_00]"
+	.quad	L_OBJC_METH_VAR_NAME_.1
+	.quad	L_OBJC_METH_VAR_TYPE_
+	.quad	"+[MyClass class_method_01]"
+	.quad	L_OBJC_METH_VAR_NAME_.2
+	.quad	L_OBJC_METH_VAR_TYPE_
+	.quad	"+[MyClass class_method_02]"
+
+	.p2align	3, 0x0
+__OBJC_METACLASS_RO_$_MyClass:
+	.long	3
+	.long	40
+	.long	40
+	.space	4
+	.quad	0
+	.quad	L_OBJC_CLASS_NAME_
+	.quad	__OBJC_$_CLASS_METHODS_MyClass
+	.quad	0
+	.quad	0
+	.quad	0
+	.quad	0
+
+	.section	__TEXT,__objc_methname,cstring_literals
+L_OBJC_METH_VAR_NAME_.3:
+	.asciz	"instance_method_00"
+
+L_OBJC_METH_VAR_NAME_.4:
+	.asciz	"instance_method_01"
+
+L_OBJC_METH_VAR_NAME_.5:
+	.asciz	"instance_method_02"
+
+	.section	__DATA,__objc_const
+	.p2align	3, 0x0
+__OBJC_$_INSTANCE_METHODS_MyClass:
+	.long	24
+	.long	3
+	.quad	L_OBJC_METH_VAR_NAME_.3
+	.quad	L_OBJC_METH_VAR_TYPE_
+	.quad	"-[MyClass instance_method_00]"
+	.quad	L_OBJC_METH_VAR_NAME_.4
+	.quad	L_OBJC_METH_VAR_TYPE_
+	.quad	"-[MyClass instance_method_01]"
+	.quad	L_OBJC_METH_VAR_NAME_.5
+	.quad	L_OBJC_METH_VAR_TYPE_
+	.quad	"-[MyClass instance_method_02]"
+
+	.p2align	3, 0x0
+__OBJC_CLASS_RO_$_MyClass:
+	.long	2
+	.long	0
+	.long	0
+	.space	4
+	.quad	0
+	.quad	L_OBJC_CLASS_NAME_
+	.quad	__OBJC_$_INSTANCE_METHODS_MyClass
+	.quad	0
+	.quad	0
+	.quad	0
+	.quad	0
+
+	.globl	__objc_empty_cache
+.zerofill __DATA,__common,__objc_empty_cache,8,3
+	.section	__DATA,__objc_classlist,regular,no_dead_strip
+	.p2align	3, 0x0
+l_OBJC_LABEL_CLASS_$:
+	.quad	_OBJC_CLASS_$_MyClass
+
+	.section	__DATA,__objc_imageinfo,regular,no_dead_strip
+L_OBJC_IMAGE_INFO:
+	.long	0
+	.long	64
+
+.subsections_via_symbols


        


More information about the llvm-commits mailing list