[llvm-branch-commits] [lld] 5f9896d - [lld-macho] Support Obj-C symbols in order files

Jez Ng via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Sun Dec 20 10:53:43 PST 2020


Author: Jez Ng
Date: 2020-12-20T13:49:18-05:00
New Revision: 5f9896d3b23c31beecf225d90194e1bf4e097677

URL: https://github.com/llvm/llvm-project/commit/5f9896d3b23c31beecf225d90194e1bf4e097677
DIFF: https://github.com/llvm/llvm-project/commit/5f9896d3b23c31beecf225d90194e1bf4e097677.diff

LOG: [lld-macho] Support Obj-C symbols in order files

Obj-C symbols may have spaces and colons, which our previous order file
parser would be confused by. The order file format has made the very unfortunate
choice of using colons for its delimiters, which means that we have to use
heuristics to determine if a given colon is part of a symbol or not...

Reviewed By: #lld-macho, thakis

Differential Revision: https://reviews.llvm.org/D93567

Added: 
    

Modified: 
    lld/MachO/Driver.cpp
    lld/test/MachO/order-file.s

Removed: 
    lld/test/MachO/invalid/order-file-bad-arch.test
    lld/test/MachO/invalid/order-file-bad-objfile.test


################################################################################
diff  --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 63d101270cf5..9838e67cd4a2 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -393,19 +393,15 @@ static void addFileList(StringRef path) {
     addFile(path, false);
 }
 
-static std::array<StringRef, 6> archNames{"arm",    "arm64", "i386",
-                                          "x86_64", "ppc",   "ppc64"};
-static bool isArchString(StringRef s) {
-  static DenseSet<StringRef> archNamesSet(archNames.begin(), archNames.end());
-  return archNamesSet.find(s) != archNamesSet.end();
-}
-
 // An order file has one entry per line, in the following format:
 //
-//   <arch>:<object file>:<symbol name>
+//   <cpu>:<object file>:<symbol name>
 //
-// <arch> and <object file> are optional. If not specified, then that entry
-// matches any symbol of that name.
+// <cpu> and <object file> are optional. If not specified, then that entry
+// matches any symbol of that name. Parsing this format is not quite
+// straightforward because the symbol name itself can contain colons, so when
+// encountering a colon, we consider the preceding characters to decide if it
+// can be a valid CPU type or file path.
 //
 // If a symbol is matched by multiple entries, then it takes the lowest-ordered
 // entry (the one nearest to the front of the list.)
@@ -420,59 +416,37 @@ static void parseOrderFile(StringRef path) {
 
   MemoryBufferRef mbref = *buffer;
   size_t priority = std::numeric_limits<size_t>::max();
-  for (StringRef rest : args::getLines(mbref)) {
-    StringRef arch, objectFile, symbol;
-
-    std::array<StringRef, 3> fields;
-    uint8_t fieldCount = 0;
-    while (rest != "" && fieldCount < 3) {
-      std::pair<StringRef, StringRef> p = getToken(rest, ": \t\n\v\f\r");
-      StringRef tok = p.first;
-      rest = p.second;
-
-      // Check if we have a comment
-      if (tok == "" || tok[0] == '#')
-        break;
-
-      fields[fieldCount++] = tok;
-    }
-
-    switch (fieldCount) {
-    case 3:
-      arch = fields[0];
-      objectFile = fields[1];
-      symbol = fields[2];
-      break;
-    case 2:
-      (isArchString(fields[0]) ? arch : objectFile) = fields[0];
-      symbol = fields[1];
-      break;
-    case 1:
-      symbol = fields[0];
-      break;
-    case 0:
-      break;
-    default:
-      llvm_unreachable("too many fields in order file");
-    }
+  for (StringRef line : args::getLines(mbref)) {
+    StringRef objectFile, symbol;
+    line = line.take_until([](char c) { return c == '#'; }); // ignore comments
+    line = line.ltrim();
+
+    CPUType cpuType = StringSwitch<CPUType>(line)
+                          .StartsWith("i386:", CPU_TYPE_I386)
+                          .StartsWith("x86_64:", CPU_TYPE_X86_64)
+                          .StartsWith("arm:", CPU_TYPE_ARM)
+                          .StartsWith("arm64:", CPU_TYPE_ARM64)
+                          .StartsWith("ppc:", CPU_TYPE_POWERPC)
+                          .StartsWith("ppc64:", CPU_TYPE_POWERPC64)
+                          .Default(CPU_TYPE_ANY);
+    // Drop the CPU type as well as the colon
+    if (cpuType != CPU_TYPE_ANY)
+      line = line.drop_until([](char c) { return c == ':'; }).drop_front();
+    // TODO: Update when we extend support for other CPUs
+    if (cpuType != CPU_TYPE_ANY && cpuType != CPU_TYPE_X86_64)
+      continue;
 
-    if (!arch.empty()) {
-      if (!isArchString(arch)) {
-        error("invalid arch \"" + arch + "\" in order file: expected one of " +
-              llvm::join(archNames, ", "));
-        continue;
+    constexpr std::array<StringRef, 2> fileEnds = {".o:", ".o):"};
+    for (StringRef fileEnd : fileEnds) {
+      size_t pos = line.find(fileEnd);
+      if (pos != StringRef::npos) {
+        // Split the string around the colon
+        objectFile = line.take_front(pos + fileEnd.size() - 1);
+        line = line.drop_front(pos + fileEnd.size());
+        break;
       }
-
-      // TODO: Update when we extend support for other archs
-      if (arch != "x86_64")
-        continue;
-    }
-
-    if (!objectFile.empty() && !objectFile.endswith(".o")) {
-      error("invalid object file name \"" + objectFile +
-            "\" in order file: should end with .o");
-      continue;
     }
+    symbol = line.trim();
 
     if (!symbol.empty()) {
       SymbolPriorityEntry &entry = config->priorities[symbol];

diff  --git a/lld/test/MachO/invalid/order-file-bad-arch.test b/lld/test/MachO/invalid/order-file-bad-arch.test
deleted file mode 100644
index 06b37b017f0b..000000000000
--- a/lld/test/MachO/invalid/order-file-bad-arch.test
+++ /dev/null
@@ -1,9 +0,0 @@
-# REQUIRES: x86
-# RUN: echo ".globl _main; .text; _main: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t.o
-# RUN: not %lld -o %t %t.o -order_file %s 2>&1 | FileCheck %s
-# CHECK: error: invalid arch "sparc" in order file:  expected one of arm, arm64, i386, x86_64, ppc, ppc64
-# CHECK-EMPTY:
-
-_barsymbol
-sparc:hello.o:_foosymbol
-i386:hello.o:_foosymbol

diff  --git a/lld/test/MachO/invalid/order-file-bad-objfile.test b/lld/test/MachO/invalid/order-file-bad-objfile.test
deleted file mode 100644
index 546e68832f13..000000000000
--- a/lld/test/MachO/invalid/order-file-bad-objfile.test
+++ /dev/null
@@ -1,10 +0,0 @@
-# REQUIRES: x86
-# RUN: echo ".globl _main; .text; _main: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t.o
-# RUN: not %lld -o %t %t.o -order_file %s 2>&1 | FileCheck %s
-# CHECK: invalid object file name "helloo" in order file: should end with .o
-# CHECK: invalid object file name "z80" in order file: should end with .o
-# CHECK-EMPTY:
-
-_barsymbol
-x86_64:helloo:_foosymbol
-z80:_foosymbol

diff  --git a/lld/test/MachO/order-file.s b/lld/test/MachO/order-file.s
index 673343935d3f..a13abe73efb9 100644
--- a/lld/test/MachO/order-file.s
+++ b/lld/test/MachO/order-file.s
@@ -5,11 +5,11 @@
 # RUN: rm -f %t/foo.a
 # RUN: llvm-ar rcs %t/foo.a %t/foo.o
 
-# FOO-FIRST: <_foo>:
+# FOO-FIRST: <_bar>:
 # FOO-FIRST: <_main>:
 
 # FOO-SECOND: <_main>:
-# FOO-SECOND: <_foo>:
+# FOO-SECOND: <_bar>:
 
 # RUN: %lld -lSystem -o %t/test-1 %t/test.o %t/foo.o -order_file %t/ord-1
 # RUN: llvm-objdump -d %t/test-1 | FileCheck %s --check-prefix=FOO-FIRST
@@ -83,9 +83,9 @@
 # RUN: %lld -lSystem -o %t/test-4 %t/foo.o %t/test.o -order_file %t/ord-multiple-4
 # RUN: llvm-objdump -d %t/test-4 | FileCheck %s --check-prefix=FOO-FIRST
 
-## _foo and _bar both point to the same location. When both symbols appear in
-## an order file, the location in question should be ordered according to the
-## lowest-ordered symbol that references it.
+## -[Foo doFoo:andBar:] and _bar both point to the same location. When both
+## symbols appear in an order file, the location in question should be ordered
+## according to the lowest-ordered symbol that references it.
 
 # RUN: %lld -lSystem -o %t/test-alias %t/test.o %t/foo.o -order_file %t/ord-alias
 # RUN: llvm-objdump -d %t/test-alias | FileCheck %s --check-prefix=FOO-FIRST
@@ -93,63 +93,63 @@
 # RUN: llvm-objdump -d %t/test-alias | FileCheck %s --check-prefix=FOO-FIRST
 
 #--- ord-1
-_foo # just a comment
+-[Foo doFoo:andBar:] # just a comment
 _main # another comment
 
 #--- ord-2
 _main # just a comment
-_foo # another comment
+-[Foo doFoo:andBar:] # another comment
 
 #--- ord-file-match
-foo.o:_foo
+foo.o:-[Foo doFoo:andBar:]
 _main
 
 #--- ord-file-nomatch
-bar.o:_foo
+bar.o:-[Foo doFoo:andBar:]
 _main
-_foo
+-[Foo doFoo:andBar:]
 
 #--- ord-arch-match
-x86_64:_foo
+x86_64:-[Foo doFoo:andBar:]
 _main
 
 #--- ord-arch-nomatch
-ppc:_foo
+ppc:-[Foo doFoo:andBar:]
 _main
-_foo
+-[Foo doFoo:andBar:]
 
 #--- ord-arch-file-match
-x86_64:bar.o:_foo
+x86_64:bar.o:-[Foo doFoo:andBar:]
 _main
 
 #--- ord-multiple-1
-_foo
+-[Foo doFoo:andBar:]
 _main
-foo.o:_foo
+foo.o:-[Foo doFoo:andBar:]
 
 #--- ord-multiple-2
-foo.o:_foo
+foo.o:-[Foo doFoo:andBar:]
 _main
-_foo
+-[Foo doFoo:andBar:]
 
 #--- ord-multiple-3
-_foo
+-[Foo doFoo:andBar:]
 _main
-_foo
+-[Foo doFoo:andBar:]
 
 #--- ord-multiple-4
-foo.o:_foo
+foo.o:-[Foo doFoo:andBar:]
 _main
-foo.o:_foo
+foo.o:-[Foo doFoo:andBar:]
 
 #--- ord-alias
 _bar
 _main
-_foo
+-[Foo doFoo:andBar:]
 
 #--- foo.s
-.globl _foo
-_foo:
+.globl "-[Foo doFoo:andBar:]"
+"-[Foo doFoo:andBar:]":
 _bar:
   ret
 
@@ -157,5 +157,5 @@ _bar:
 .globl _main
 
 _main:
-  callq _foo
+  callq "-[Foo doFoo:andBar:]"
   ret


        


More information about the llvm-branch-commits mailing list