[lld] c32e69b - [lld-macho][re-land] Initial support for common symbols

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 24 15:00:28 PDT 2020


Author: Jez Ng
Date: 2020-09-24T15:00:20-07:00
New Revision: c32e69b2ce7abfb151a87ba363ac9e25abf7d417

URL: https://github.com/llvm/llvm-project/commit/c32e69b2ce7abfb151a87ba363ac9e25abf7d417
DIFF: https://github.com/llvm/llvm-project/commit/c32e69b2ce7abfb151a87ba363ac9e25abf7d417.diff

LOG: [lld-macho][re-land] Initial support for common symbols

Fix earlier build break via a static_cast.

This reverts commit 8112d494d344dc0935d5c078f066a43d7c984e0c.

Differential Revision: https://reviews.llvm.org/D86909

Added: 
    lld/test/MachO/common-symbol-coalescing.s

Modified: 
    lld/MachO/Driver.cpp
    lld/MachO/InputFiles.cpp
    lld/MachO/SymbolTable.cpp
    lld/MachO/SymbolTable.h
    lld/MachO/Symbols.h
    lld/MachO/SyntheticSections.h

Removed: 
    


################################################################################
diff  --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 02a7ccca7fbb..c248110e89f7 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -436,6 +436,34 @@ static bool markSubLibrary(StringRef searchName) {
   return false;
 }
 
+// Replaces common symbols with defined symbols residing in __common sections.
+// This function must be called after all symbol names are resolved (i.e. after
+// all InputFiles have been loaded.) As a result, later operations won't see
+// any CommonSymbols.
+static void replaceCommonSymbols() {
+  for (macho::Symbol *sym : symtab->getSymbols()) {
+    auto *common = dyn_cast<CommonSymbol>(sym);
+    if (common == nullptr)
+      continue;
+
+    auto *isec = make<InputSection>();
+    isec->file = common->file;
+    isec->name = section_names::common;
+    isec->segname = segment_names::data;
+    isec->align = common->align;
+    // Casting to size_t will truncate large values on 32-bit architectures,
+    // but it's not really worth supporting the linking of 64-bit programs on
+    // 32-bit archs.
+    isec->data = {nullptr, static_cast<size_t>(common->size)};
+    isec->flags = S_ZEROFILL;
+    inputSections.push_back(isec);
+
+    replaceSymbol<Defined>(sym, sym->getName(), isec, /*value=*/0,
+                           /*isWeakDef=*/false,
+                           /*isExternal=*/true);
+  }
+}
+
 static inline char toLowerDash(char x) {
   if (x >= 'A' && x <= 'Z')
     return x - 'A' + 'a';
@@ -641,6 +669,8 @@ bool macho::link(llvm::ArrayRef<const char *> argsArr, bool canExitEarly,
       error("-sub_library " + searchName + " does not match a supplied dylib");
   }
 
+  replaceCommonSymbols();
+
   StringRef orderFile = args.getLastArgValue(OPT_order_file);
   if (!orderFile.empty())
     parseOrderFile(orderFile);

diff  --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 14eb1c431005..ad2d6a25849a 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -243,10 +243,12 @@ void InputFile::parseSymbols(ArrayRef<structs::nlist_64> nList,
   for (size_t i = 0, n = nList.size(); i < n; ++i) {
     const structs::nlist_64 &sym = nList[i];
 
-    // Undefined symbol
-    if (!sym.n_sect) {
+    if ((sym.n_type & N_TYPE) == N_UNDF) {
       StringRef name = strtab + sym.n_strx;
-      symbols[i] = symtab->addUndefined(name);
+      symbols[i] = sym.n_value == 0
+                       ? symtab->addUndefined(name)
+                       : symtab->addCommon(name, this, sym.n_value,
+                                           1 << GET_COMM_ALIGN(sym.n_desc));
       continue;
     }
 

diff  --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp
index cfb35718e7ec..6719981b5dff 100644
--- a/lld/MachO/SymbolTable.cpp
+++ b/lld/MachO/SymbolTable.cpp
@@ -74,6 +74,26 @@ Symbol *SymbolTable::addUndefined(StringRef name) {
   return s;
 }
 
+Symbol *SymbolTable::addCommon(StringRef name, InputFile *file, uint64_t size,
+                               uint32_t align) {
+  Symbol *s;
+  bool wasInserted;
+  std::tie(s, wasInserted) = insert(name);
+
+  if (!wasInserted) {
+    if (auto *common = dyn_cast<CommonSymbol>(s)) {
+      if (size < common->size)
+        return s;
+    } else if (!isa<Undefined>(s)) {
+      error("TODO: implement common symbol resolution with other symbol kinds");
+      return s;
+    }
+  }
+
+  replaceSymbol<CommonSymbol>(s, name, file, size, align);
+  return s;
+}
+
 Symbol *SymbolTable::addDylib(StringRef name, DylibFile *file, bool isWeakDef,
                               bool isTlv) {
   Symbol *s;

diff  --git a/lld/MachO/SymbolTable.h b/lld/MachO/SymbolTable.h
index 178b26f48599..b4c7ec8604ff 100644
--- a/lld/MachO/SymbolTable.h
+++ b/lld/MachO/SymbolTable.h
@@ -19,6 +19,7 @@ namespace macho {
 
 class ArchiveFile;
 class DylibFile;
+class InputFile;
 class InputSection;
 class MachHeaderSection;
 class Symbol;
@@ -36,6 +37,8 @@ class SymbolTable {
 
   Symbol *addUndefined(StringRef name);
 
+  Symbol *addCommon(StringRef name, InputFile *, uint64_t size, uint32_t align);
+
   Symbol *addDylib(StringRef name, DylibFile *file, bool isWeakDef, bool isTlv);
 
   Symbol *addLazy(StringRef name, ArchiveFile *file,

diff  --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h
index 8feab5094f97..14c1ee813420 100644
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@@ -14,6 +14,7 @@
 #include "lld/Common/ErrorHandler.h"
 #include "lld/Common/Strings.h"
 #include "llvm/Object/Archive.h"
+#include "llvm/Support/MathExtras.h"
 
 namespace lld {
 namespace macho {
@@ -36,6 +37,7 @@ class Symbol {
   enum Kind {
     DefinedKind,
     UndefinedKind,
+    CommonKind,
     DylibKind,
     LazyKind,
     DSOHandleKind,
@@ -117,6 +119,35 @@ class Undefined : public Symbol {
   static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; }
 };
 
+// On Unix, it is traditionally allowed to write variable definitions without
+// initialization expressions (such as "int foo;") to header files. These are
+// called tentative definitions.
+//
+// Using tentative definitions is usually considered a bad practice; you should
+// write only declarations (such as "extern int foo;") to header files.
+// Nevertheless, the linker and the compiler have to do something to support
+// bad code by allowing duplicate definitions for this particular case.
+//
+// The compiler creates common symbols when it sees tentative definitions.
+// (You can suppress this behavior and let the compiler create a regular
+// defined symbol by passing -fno-common.) When linking the final binary, if
+// there are remaining common symbols after name resolution is complete, the
+// linker converts them to regular defined symbols in a __common section.
+class CommonSymbol : public Symbol {
+public:
+  CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align)
+      : Symbol(CommonKind, name), file(file), size(size),
+        align(align != 1 ? align : llvm::PowerOf2Ceil(size)) {
+    // TODO: cap maximum alignment
+  }
+
+  static bool classof(const Symbol *s) { return s->kind() == CommonKind; }
+
+  InputFile *const file;
+  const uint64_t size;
+  const uint32_t align;
+};
+
 class DylibSymbol : public Symbol {
 public:
   DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef, bool isTlv)
@@ -185,8 +216,10 @@ class DSOHandle : public Symbol {
 union SymbolUnion {
   alignas(Defined) char a[sizeof(Defined)];
   alignas(Undefined) char b[sizeof(Undefined)];
-  alignas(DylibSymbol) char c[sizeof(DylibSymbol)];
-  alignas(LazySymbol) char d[sizeof(LazySymbol)];
+  alignas(CommonSymbol) char c[sizeof(CommonSymbol)];
+  alignas(DylibSymbol) char d[sizeof(DylibSymbol)];
+  alignas(LazySymbol) char e[sizeof(LazySymbol)];
+  alignas(DSOHandle) char f[sizeof(DSOHandle)];
 };
 
 template <typename T, typename... ArgT>

diff  --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index 90861839e4f3..55fd15ea8aff 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -26,6 +26,7 @@ namespace macho {
 namespace section_names {
 
 constexpr const char pageZero[] = "__pagezero";
+constexpr const char common[] = "__common";
 constexpr const char header[] = "__mach_header";
 constexpr const char binding[] = "__binding";
 constexpr const char weakBinding[] = "__weak_binding";

diff  --git a/lld/test/MachO/common-symbol-coalescing.s b/lld/test/MachO/common-symbol-coalescing.s
new file mode 100644
index 000000000000..88cec5615cf1
--- /dev/null
+++ b/lld/test/MachO/common-symbol-coalescing.s
@@ -0,0 +1,83 @@
+# REQUIRES: x86
+# RUN: split-file %s %t
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/same-size.s -o %t/same-size.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/smaller-size.s -o %t/smaller-size.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/zero-align.s -o %t/zero-align.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/zero-align-round-up.s -o %t/zero-align-round-up.o
+
+## Check that we pick the definition with the larger size, regardless of
+## its alignment.
+# RUN: lld -flavor darwinnew %t/test.o %t/smaller-size.o -order_file %t/order -o %t/test
+# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=SMALLER-ALIGNMENT
+# RUN: lld -flavor darwinnew %t/smaller-size.o %t/test.o -order_file %t/order -o %t/test
+# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=SMALLER-ALIGNMENT
+
+## When the sizes are equal, we pick the symbol whose file occurs later in the
+## command-line argument list.
+# RUN: lld -flavor darwinnew %t/test.o %t/same-size.o -order_file %t/order -o %t/test
+# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=LARGER-ALIGNMENT
+# RUN: lld -flavor darwinnew %t/same-size.o %t/test.o -order_file %t/order -o %t/test
+# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=SMALLER-ALIGNMENT
+
+# RUN: lld -flavor darwinnew %t/test.o %t/zero-align.o -order_file %t/order -o %t/test
+# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=LARGER-ALIGNMENT
+# RUN: lld -flavor darwinnew %t/zero-align.o %t/test.o -order_file %t/order -o %t/test
+# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=LARGER-ALIGNMENT
+
+# RUN: lld -flavor darwinnew %t/test.o %t/zero-align-round-up.o -order_file %t/order -o %t/test
+# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=LARGER-ALIGNMENT
+# RUN: lld -flavor darwinnew %t/zero-align-round-up.o %t/test.o -order_file %t/order -o %t/test
+# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=LARGER-ALIGNMENT
+
+# SMALLER-ALIGNMENT-LABEL: Sections:
+# SMALLER-ALIGNMENT:       __common      {{[0-9a-f]+}} [[#%x, COMMON_START:]]  BSS
+
+# SMALLER-ALIGNMENT-LABEL: SYMBOL TABLE:
+# SMALLER-ALIGNMENT-DAG:   [[#COMMON_START]]      g     O __DATA,__common _check_size
+# SMALLER-ALIGNMENT-DAG:   [[#COMMON_START + 2]]  g     O __DATA,__common _end_marker
+# SMALLER-ALIGNMENT-DAG:   [[#COMMON_START + 8]]  g     O __DATA,__common _check_alignment
+
+# LARGER-ALIGNMENT-LABEL: Sections:
+# LARGER-ALIGNMENT:       __common      {{[0-9a-f]+}} [[#%x, COMMON_START:]]  BSS
+
+# LARGER-ALIGNMENT-LABEL: SYMBOL TABLE:
+# LARGER-ALIGNMENT-DAG:   [[#COMMON_START]]      g     O __DATA,__common _check_size
+# LARGER-ALIGNMENT-DAG:   [[#COMMON_START + 2]]  g     O __DATA,__common _end_marker
+# LARGER-ALIGNMENT-DAG:   [[#COMMON_START + 16]] g     O __DATA,__common _check_alignment
+
+#--- order
+## Order is important as we determine the size of a given symbol via the
+## address of the next symbol.
+_check_size
+_end_marker
+_check_alignment
+
+#--- smaller-size.s
+.comm _check_size, 1, 1
+.comm _check_alignment, 1, 4
+
+#--- same-size.s
+.comm _check_size, 2, 1
+.comm _check_alignment, 2, 4
+
+#--- zero-align.s
+.comm _check_size, 2, 1
+## If alignment is set to zero, use the size to determine the alignment.
+.comm _check_alignment, 16, 0
+
+#--- zero-align-round-up.s
+.comm _check_size, 2, 1
+## If alignment is set to zero, use the size to determine the alignment. If the
+## size is not a power of two, round it up. (In this case, 14 rounds to 16.)
+.comm _check_alignment, 14, 0
+
+#--- test.s
+.comm _check_size, 2, 1
+.comm _end_marker, 1
+.comm _check_alignment, 2, 3
+
+.globl _main
+_main:
+  ret


        


More information about the llvm-commits mailing list