[lld] 63ace77 - [lld-macho] Initial support for common symbols

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 23 19:26:51 PDT 2020


Author: Jez Ng
Date: 2020-09-23T19:26:40-07:00
New Revision: 63ace77962543f961f1d566dd1243b1fb37129ef

URL: https://github.com/llvm/llvm-project/commit/63ace77962543f961f1d566dd1243b1fb37129ef
DIFF: https://github.com/llvm/llvm-project/commit/63ace77962543f961f1d566dd1243b1fb37129ef.diff

LOG: [lld-macho] Initial support for common symbols

On Unix, it is traditionally allowed to write variable definitions without
initialization expressions (such as "int foo;") to header files. These are
called tentative definitions.

The compiler creates common symbols when it sees tentative definitions. When
linking the final binary, if there are remaining common symbols after name
resolution is complete, the linker converts them to regular defined symbols in
a `__common` section.

This diff implements most of that functionality, though we do not yet handle
the case where there are both common and non-common definitions of the same
symbol.

Reviewed By: #lld-macho, gkm

Differential Revision: https://reviews.llvm.org/D86909

Added: 
    lld/test/MachO/common-symbol-coalescing.s

Modified: 
    lld/MachO/Driver.cpp
    lld/MachO/InputFiles.cpp
    lld/MachO/SymbolTable.cpp
    lld/MachO/SymbolTable.h
    lld/MachO/Symbols.h
    lld/MachO/SyntheticSections.h

Removed: 
    


################################################################################
diff  --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index a6e2f5c102e08..cc0211a967fa6 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -417,6 +417,31 @@ static bool markSubLibrary(StringRef searchName) {
   return false;
 }
 
+// Replaces common symbols with defined symbols residing in __common sections.
+// This function must be called after all symbol names are resolved (i.e. after
+// all InputFiles have been loaded.) As a result, later operations won't see
+// any CommonSymbols.
+static void replaceCommonSymbols() {
+  for (macho::Symbol *sym : symtab->getSymbols()) {
+    auto *common = dyn_cast<CommonSymbol>(sym);
+    if (common == nullptr)
+      continue;
+
+    auto *isec = make<InputSection>();
+    isec->file = common->file;
+    isec->name = section_names::common;
+    isec->segname = segment_names::data;
+    isec->align = common->align;
+    isec->data = {nullptr, common->size};
+    isec->flags = S_ZEROFILL;
+    inputSections.push_back(isec);
+
+    replaceSymbol<Defined>(sym, sym->getName(), isec, /*value=*/0,
+                           /*isWeakDef=*/false,
+                           /*isExternal=*/true);
+  }
+}
+
 static inline char toLowerDash(char x) {
   if (x >= 'A' && x <= 'Z')
     return x - 'A' + 'a';
@@ -616,6 +641,8 @@ bool macho::link(llvm::ArrayRef<const char *> argsArr, bool canExitEarly,
       error("-sub_library " + searchName + " does not match a supplied dylib");
   }
 
+  replaceCommonSymbols();
+
   StringRef orderFile = args.getLastArgValue(OPT_order_file);
   if (!orderFile.empty())
     parseOrderFile(orderFile);

diff  --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 670dbf59ea9cf..8516e90cffc30 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -243,10 +243,12 @@ void InputFile::parseSymbols(ArrayRef<structs::nlist_64> nList,
   for (size_t i = 0, n = nList.size(); i < n; ++i) {
     const structs::nlist_64 &sym = nList[i];
 
-    // Undefined symbol
-    if (!sym.n_sect) {
+    if ((sym.n_type & N_TYPE) == N_UNDF) {
       StringRef name = strtab + sym.n_strx;
-      symbols[i] = symtab->addUndefined(name);
+      symbols[i] = sym.n_value == 0
+                       ? symtab->addUndefined(name)
+                       : symtab->addCommon(name, this, sym.n_value,
+                                           1 << GET_COMM_ALIGN(sym.n_desc));
       continue;
     }
 

diff  --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp
index cfb35718e7ec2..6719981b5dff1 100644
--- a/lld/MachO/SymbolTable.cpp
+++ b/lld/MachO/SymbolTable.cpp
@@ -74,6 +74,26 @@ Symbol *SymbolTable::addUndefined(StringRef name) {
   return s;
 }
 
+Symbol *SymbolTable::addCommon(StringRef name, InputFile *file, uint64_t size,
+                               uint32_t align) {
+  Symbol *s;
+  bool wasInserted;
+  std::tie(s, wasInserted) = insert(name);
+
+  if (!wasInserted) {
+    if (auto *common = dyn_cast<CommonSymbol>(s)) {
+      if (size < common->size)
+        return s;
+    } else if (!isa<Undefined>(s)) {
+      error("TODO: implement common symbol resolution with other symbol kinds");
+      return s;
+    }
+  }
+
+  replaceSymbol<CommonSymbol>(s, name, file, size, align);
+  return s;
+}
+
 Symbol *SymbolTable::addDylib(StringRef name, DylibFile *file, bool isWeakDef,
                               bool isTlv) {
   Symbol *s;

diff  --git a/lld/MachO/SymbolTable.h b/lld/MachO/SymbolTable.h
index 178b26f485996..b4c7ec8604ff7 100644
--- a/lld/MachO/SymbolTable.h
+++ b/lld/MachO/SymbolTable.h
@@ -19,6 +19,7 @@ namespace macho {
 
 class ArchiveFile;
 class DylibFile;
+class InputFile;
 class InputSection;
 class MachHeaderSection;
 class Symbol;
@@ -36,6 +37,8 @@ class SymbolTable {
 
   Symbol *addUndefined(StringRef name);
 
+  Symbol *addCommon(StringRef name, InputFile *, uint64_t size, uint32_t align);
+
   Symbol *addDylib(StringRef name, DylibFile *file, bool isWeakDef, bool isTlv);
 
   Symbol *addLazy(StringRef name, ArchiveFile *file,

diff  --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h
index 33ba00860ef33..8e1b9c62e92e9 100644
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@@ -14,6 +14,7 @@
 #include "lld/Common/ErrorHandler.h"
 #include "lld/Common/Strings.h"
 #include "llvm/Object/Archive.h"
+#include "llvm/Support/MathExtras.h"
 
 namespace lld {
 namespace macho {
@@ -36,6 +37,7 @@ class Symbol {
   enum Kind {
     DefinedKind,
     UndefinedKind,
+    CommonKind,
     DylibKind,
     LazyKind,
     DSOHandleKind,
@@ -115,6 +117,35 @@ class Undefined : public Symbol {
   static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; }
 };
 
+// On Unix, it is traditionally allowed to write variable definitions without
+// initialization expressions (such as "int foo;") to header files. These are
+// called tentative definitions.
+//
+// Using tentative definitions is usually considered a bad practice; you should
+// write only declarations (such as "extern int foo;") to header files.
+// Nevertheless, the linker and the compiler have to do something to support
+// bad code by allowing duplicate definitions for this particular case.
+//
+// The compiler creates common symbols when it sees tentative definitions.
+// (You can suppress this behavior and let the compiler create a regular
+// defined symbol by passing -fno-common.) When linking the final binary, if
+// there are remaining common symbols after name resolution is complete, the
+// linker converts them to regular defined symbols in a __common section.
+class CommonSymbol : public Symbol {
+public:
+  CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align)
+      : Symbol(CommonKind, name), file(file), size(size),
+        align(align != 1 ? align : llvm::PowerOf2Ceil(size)) {
+    // TODO: cap maximum alignment
+  }
+
+  static bool classof(const Symbol *s) { return s->kind() == CommonKind; }
+
+  InputFile *const file;
+  const uint64_t size;
+  const uint32_t align;
+};
+
 class DylibSymbol : public Symbol {
 public:
   DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef, bool isTlv)
@@ -183,8 +214,10 @@ class DSOHandle : public Symbol {
 union SymbolUnion {
   alignas(Defined) char a[sizeof(Defined)];
   alignas(Undefined) char b[sizeof(Undefined)];
-  alignas(DylibSymbol) char c[sizeof(DylibSymbol)];
-  alignas(LazySymbol) char d[sizeof(LazySymbol)];
+  alignas(CommonSymbol) char c[sizeof(CommonSymbol)];
+  alignas(DylibSymbol) char d[sizeof(DylibSymbol)];
+  alignas(LazySymbol) char e[sizeof(LazySymbol)];
+  alignas(DSOHandle) char f[sizeof(DSOHandle)];
 };
 
 template <typename T, typename... ArgT>

diff  --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index b7571e473bd27..03bc216e206dc 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -26,6 +26,7 @@ namespace macho {
 namespace section_names {
 
 constexpr const char pageZero[] = "__pagezero";
+constexpr const char common[] = "__common";
 constexpr const char header[] = "__mach_header";
 constexpr const char binding[] = "__binding";
 constexpr const char weakBinding[] = "__weak_binding";

diff  --git a/lld/test/MachO/common-symbol-coalescing.s b/lld/test/MachO/common-symbol-coalescing.s
new file mode 100644
index 0000000000000..88cec5615cf1e
--- /dev/null
+++ b/lld/test/MachO/common-symbol-coalescing.s
@@ -0,0 +1,83 @@
+# REQUIRES: x86
+# RUN: split-file %s %t
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/same-size.s -o %t/same-size.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/smaller-size.s -o %t/smaller-size.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/zero-align.s -o %t/zero-align.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/zero-align-round-up.s -o %t/zero-align-round-up.o
+
+## Check that we pick the definition with the larger size, regardless of
+## its alignment.
+# RUN: lld -flavor darwinnew %t/test.o %t/smaller-size.o -order_file %t/order -o %t/test
+# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=SMALLER-ALIGNMENT
+# RUN: lld -flavor darwinnew %t/smaller-size.o %t/test.o -order_file %t/order -o %t/test
+# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=SMALLER-ALIGNMENT
+
+## When the sizes are equal, we pick the symbol whose file occurs later in the
+## command-line argument list.
+# RUN: lld -flavor darwinnew %t/test.o %t/same-size.o -order_file %t/order -o %t/test
+# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=LARGER-ALIGNMENT
+# RUN: lld -flavor darwinnew %t/same-size.o %t/test.o -order_file %t/order -o %t/test
+# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=SMALLER-ALIGNMENT
+
+# RUN: lld -flavor darwinnew %t/test.o %t/zero-align.o -order_file %t/order -o %t/test
+# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=LARGER-ALIGNMENT
+# RUN: lld -flavor darwinnew %t/zero-align.o %t/test.o -order_file %t/order -o %t/test
+# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=LARGER-ALIGNMENT
+
+# RUN: lld -flavor darwinnew %t/test.o %t/zero-align-round-up.o -order_file %t/order -o %t/test
+# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=LARGER-ALIGNMENT
+# RUN: lld -flavor darwinnew %t/zero-align-round-up.o %t/test.o -order_file %t/order -o %t/test
+# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=LARGER-ALIGNMENT
+
+# SMALLER-ALIGNMENT-LABEL: Sections:
+# SMALLER-ALIGNMENT:       __common      {{[0-9a-f]+}} [[#%x, COMMON_START:]]  BSS
+
+# SMALLER-ALIGNMENT-LABEL: SYMBOL TABLE:
+# SMALLER-ALIGNMENT-DAG:   [[#COMMON_START]]      g     O __DATA,__common _check_size
+# SMALLER-ALIGNMENT-DAG:   [[#COMMON_START + 2]]  g     O __DATA,__common _end_marker
+# SMALLER-ALIGNMENT-DAG:   [[#COMMON_START + 8]]  g     O __DATA,__common _check_alignment
+
+# LARGER-ALIGNMENT-LABEL: Sections:
+# LARGER-ALIGNMENT:       __common      {{[0-9a-f]+}} [[#%x, COMMON_START:]]  BSS
+
+# LARGER-ALIGNMENT-LABEL: SYMBOL TABLE:
+# LARGER-ALIGNMENT-DAG:   [[#COMMON_START]]      g     O __DATA,__common _check_size
+# LARGER-ALIGNMENT-DAG:   [[#COMMON_START + 2]]  g     O __DATA,__common _end_marker
+# LARGER-ALIGNMENT-DAG:   [[#COMMON_START + 16]] g     O __DATA,__common _check_alignment
+
+#--- order
+## Order is important as we determine the size of a given symbol via the
+## address of the next symbol.
+_check_size
+_end_marker
+_check_alignment
+
+#--- smaller-size.s
+.comm _check_size, 1, 1
+.comm _check_alignment, 1, 4
+
+#--- same-size.s
+.comm _check_size, 2, 1
+.comm _check_alignment, 2, 4
+
+#--- zero-align.s
+.comm _check_size, 2, 1
+## If alignment is set to zero, use the size to determine the alignment.
+.comm _check_alignment, 16, 0
+
+#--- zero-align-round-up.s
+.comm _check_size, 2, 1
+## If alignment is set to zero, use the size to determine the alignment. If the
+## size is not a power of two, round it up. (In this case, 14 rounds to 16.)
+.comm _check_alignment, 14, 0
+
+#--- test.s
+.comm _check_size, 2, 1
+.comm _end_marker, 1
+.comm _check_alignment, 2, 3
+
+.globl _main
+_main:
+  ret


        


More information about the llvm-commits mailing list