[lld] 8c45e80 - [lld/mac] Port typo correction for undefined symbols from ELF port

Nico Weber via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 3 15:54:04 PDT 2022


Author: Nico Weber
Date: 2022-10-03T18:53:55-04:00
New Revision: 8c45e80298f4e3eb6d9cfbafcb099bc087e4668e

URL: https://github.com/llvm/llvm-project/commit/8c45e80298f4e3eb6d9cfbafcb099bc087e4668e
DIFF: https://github.com/llvm/llvm-project/commit/8c45e80298f4e3eb6d9cfbafcb099bc087e4668e.diff

LOG: [lld/mac] Port typo correction for undefined symbols from ELF port

Ports:
- core feature: https://reviews.llvm.org/D67039
- case mismatch: https://reviews.llvm.org/D70506
- extern "C" suggestions: https://reviews.llvm.org/D69592,
  https://reviews.llvm.org/D69650

Does not port https://reviews.llvm.org/D71735 since I believe that that doesn't
apply to lld/Mach-O.

Differential Revision: https://reviews.llvm.org/D135038

Added: 
    lld/test/MachO/undef-spell-corrector.s
    lld/test/MachO/undef-suggest-extern-c.s
    lld/test/MachO/undef-suggest-extern-c2.s

Modified: 
    lld/MachO/SymbolTable.cpp

Removed: 
    


################################################################################
diff  --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp
index 5173fa069e22b..ccb1d51bdbe42 100644
--- a/lld/MachO/SymbolTable.cpp
+++ b/lld/MachO/SymbolTable.cpp
@@ -392,8 +392,138 @@ void macho::reportPendingDuplicateSymbols() {
   }
 }
 
+// Check whether the definition name def is a mangled function name that matches
+// the reference name ref.
+static bool canSuggestExternCForCXX(StringRef ref, StringRef def) {
+  llvm::ItaniumPartialDemangler d;
+  std::string name = def.str();
+  if (d.partialDemangle(name.c_str()))
+    return false;
+  char *buf = d.getFunctionName(nullptr, nullptr);
+  if (!buf)
+    return false;
+  bool ret = ref == buf;
+  free(buf);
+  return ret;
+}
+
+// Suggest an alternative spelling of an "undefined symbol" diagnostic. Returns
+// the suggested symbol, which is either in the symbol table, or in the same
+// file of sym.
+static const Symbol *getAlternativeSpelling(const Undefined &sym,
+                                            std::string &pre_hint,
+                                            std::string &post_hint) {
+  DenseMap<StringRef, const Symbol *> map;
+  if (sym.getFile() && sym.getFile()->kind() == InputFile::ObjKind) {
+    // Build a map of local defined symbols.
+    for (const Symbol *s : sym.getFile()->symbols)
+      if (auto *defined = dyn_cast<Defined>(s))
+        if (!defined->isExternal())
+          map.try_emplace(s->getName(), s);
+  }
+
+  auto suggest = [&](StringRef newName) -> const Symbol * {
+    // If defined locally.
+    if (const Symbol *s = map.lookup(newName))
+      return s;
+
+    // If in the symbol table and not undefined.
+    if (const Symbol *s = symtab->find(newName))
+      if (dyn_cast<Undefined>(s) == nullptr)
+        return s;
+
+    return nullptr;
+  };
+
+  // This loop enumerates all strings of Levenshtein distance 1 as typo
+  // correction candidates and suggests the one that exists as a non-undefined
+  // symbol.
+  StringRef name = sym.getName();
+  for (size_t i = 0, e = name.size(); i != e + 1; ++i) {
+    // Insert a character before name[i].
+    std::string newName = (name.substr(0, i) + "0" + name.substr(i)).str();
+    for (char c = '0'; c <= 'z'; ++c) {
+      newName[i] = c;
+      if (const Symbol *s = suggest(newName))
+        return s;
+    }
+    if (i == e)
+      break;
+
+    // Substitute name[i].
+    newName = std::string(name);
+    for (char c = '0'; c <= 'z'; ++c) {
+      newName[i] = c;
+      if (const Symbol *s = suggest(newName))
+        return s;
+    }
+
+    // Transpose name[i] and name[i+1]. This is of edit distance 2 but it is
+    // common.
+    if (i + 1 < e) {
+      newName[i] = name[i + 1];
+      newName[i + 1] = name[i];
+      if (const Symbol *s = suggest(newName))
+        return s;
+    }
+
+    // Delete name[i].
+    newName = (name.substr(0, i) + name.substr(i + 1)).str();
+    if (const Symbol *s = suggest(newName))
+      return s;
+  }
+
+  // Case mismatch, e.g. Foo vs FOO.
+  for (auto &it : map)
+    if (name.equals_insensitive(it.first))
+      return it.second;
+  for (Symbol *sym : symtab->getSymbols())
+    if (dyn_cast<Undefined>(sym) == nullptr &&
+        name.equals_insensitive(sym->getName()))
+      return sym;
+
+  // The reference may be a mangled name while the definition is not. Suggest a
+  // missing extern "C".
+  if (name.startswith("__Z")) {
+    std::string buf = name.str();
+    llvm::ItaniumPartialDemangler d;
+    if (!d.partialDemangle(buf.c_str()))
+      if (char *buf = d.getFunctionName(nullptr, nullptr)) {
+        const Symbol *s = suggest((Twine("_") + buf).str());
+        free(buf);
+        if (s) {
+          pre_hint = ": extern \"C\" ";
+          return s;
+        }
+      }
+  } else {
+    StringRef name_without_underscore = name;
+    name_without_underscore.consume_front("_");
+    const Symbol *s = nullptr;
+    for (auto &it : map)
+      if (canSuggestExternCForCXX(name_without_underscore, it.first)) {
+        s = it.second;
+        break;
+      }
+    if (!s)
+      for (Symbol *sym : symtab->getSymbols())
+        if (canSuggestExternCForCXX(name_without_underscore, sym->getName())) {
+          s = sym;
+          break;
+        }
+    if (s) {
+      pre_hint = " to declare ";
+      post_hint = " as extern \"C\"?";
+      return s;
+    }
+  }
+
+  return nullptr;
+}
+
 static void reportUndefinedSymbol(const Undefined &sym,
-                                  const UndefinedDiag &locations) {
+                                  const UndefinedDiag &locations,
+                                  bool correctSpelling) {
   std::string message = "undefined symbol";
   if (config->archMultiple)
     message += (" for arch " + getArchitectureName(config->arch())).str();
@@ -426,6 +556,17 @@ static void reportUndefinedSymbol(const Undefined &sym,
         ("\n>>> referenced " + Twine(totalReferences - i) + " more times")
             .str();
 
+  if (correctSpelling) {
+    std::string pre_hint = ": ", post_hint;
+    if (const Symbol *corrected =
+            getAlternativeSpelling(sym, pre_hint, post_hint)) {
+      message +=
+          "\n>>> did you mean" + pre_hint + toString(*corrected) + post_hint;
+      if (corrected->getFile())
+        message += "\n>>> defined in: " + toString(corrected->getFile());
+    }
+  }
+
   if (config->undefinedSymbolTreatment == UndefinedSymbolTreatment::error)
     error(message);
   else if (config->undefinedSymbolTreatment ==
@@ -436,8 +577,9 @@ static void reportUndefinedSymbol(const Undefined &sym,
 }
 
 void macho::reportPendingUndefinedSymbols() {
-  for (const auto &undef : undefs)
-    reportUndefinedSymbol(*undef.first, undef.second);
+  // Enable spell corrector for the first 2 diagnostics.
+  for (const auto &[i, undef] : llvm::enumerate(undefs))
+    reportUndefinedSymbol(*undef.first, undef.second, i < 2);
 
   // This function is called multiple times during execution. Clear the printed
   // diagnostics to avoid printing the same things again the next time.

diff  --git a/lld/test/MachO/undef-spell-corrector.s b/lld/test/MachO/undef-spell-corrector.s
new file mode 100644
index 0000000000000..cfec062bfb532
--- /dev/null
+++ b/lld/test/MachO/undef-spell-corrector.s
@@ -0,0 +1,79 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos %s -o %t.o
+
+## Insert a character.
+## The spell corrector is enabled for the first two "undefined symbol" diagnostics.
+# RUN: echo 'call bcde; call abcd; call abde' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
+# RUN: not %lld %t.o %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=INSERT %s -DFILE=%t.o
+
+## Symbols defined in DSO can be suggested.
+# RUN: %lld %t.o -dylib -o %t.dylib
+# RUN: not %lld %t.dylib %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=INSERT %s -DFILE=%t.dylib
+
+# INSERT:      error: undefined symbol: abde
+# INSERT-NEXT: >>> referenced by {{.*}}
+# INSERT-NEXT: >>> did you mean: abcde
+# INSERT-NEXT: >>> defined in: [[FILE]]
+# INSERT:      error: undefined symbol: abcd
+# INSERT-NEXT: >>> referenced by {{.*}}
+# INSERT-NEXT: >>> did you mean: abcde
+# INSERT-NEXT: >>> defined in: [[FILE]]
+# INSERT:      error: undefined symbol: bcde
+# INSERT-NEXT: >>> referenced by {{.*}}
+# INSERT-NOT:  >>>
+
+## Substitute a character.
+# RUN: echo 'call bbcde; call abcdd' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
+# RUN: not %lld %t.o %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=SUBST %s
+
+# SUBST:      error: undefined symbol: abcdd
+# SUBST-NEXT: >>> referenced by {{.*}}
+# SUBST-NEXT: >>> did you mean: abcde
+# SUBST:      error: undefined symbol: bbcde
+# SUBST-NEXT: >>> referenced by {{.*}}
+# SUBST-NEXT: >>> did you mean: abcde
+
+## Delete a character.
+# RUN: echo 'call aabcde; call abcdee' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
+# RUN: not %lld %t.o %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=DELETE %s
+
+# DELETE:      error: undefined symbol: abcdee
+# DELETE-NEXT: >>> referenced by {{.*}}
+# DELETE-NEXT: >>> did you mean: abcde
+# DELETE:      error: undefined symbol: aabcde
+# DELETE-NEXT: >>> referenced by {{.*}}
+# DELETE-NEXT: >>> did you mean: abcde
+
+## Transpose.
+# RUN: echo 'call bacde' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
+# RUN: not %lld %t.o %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=TRANSPOSE %s
+
+# TRANSPOSE:      error: undefined symbol: bacde
+# TRANSPOSE-NEXT: >>> referenced by {{.*}}
+# TRANSPOSE-NEXT: >>> did you mean: abcde
+
+## Missing const qualifier.
+# RUN: echo 'call __Z3fooPi' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
+# RUN: not %lld %t.o %t1.o -demangle -o /dev/null 2>&1 | FileCheck --check-prefix=CONST %s
+## Local defined symbols.
+# RUN: echo '__Z3fooPKi: call __Z3fooPi' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
+# RUN: not %lld %t1.o -demangle -o /dev/null 2>&1 | FileCheck --check-prefix=CONST %s
+
+# CONST:      error: undefined symbol: foo(int*)
+# CONST-NEXT: >>> referenced by {{.*}}
+# CONST-NEXT: >>> did you mean: foo(int const*)
+
+## Case mismatch.
+# RUN: echo 'call __Z3FOOPKi' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
+# RUN: not %lld %t.o %t1.o -demangle -o /dev/null 2>&1 | FileCheck --check-prefix=CASE %s
+# RUN: echo '__Z3fooPKi: call __Z3FOOPKi' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
+# RUN: not %lld %t1.o -demangle -o /dev/null 2>&1 | FileCheck --check-prefix=CASE %s
+
+# CASE:      error: undefined symbol: FOO(int const*)
+# CASE-NEXT: >>> referenced by {{.*}}
+# CASE-NEXT: >>> did you mean: foo(int const*)
+
+.globl _main, abcde, __Z3fooPKi
+_main:
+abcde:
+__Z3fooPKi:

diff  --git a/lld/test/MachO/undef-suggest-extern-c.s b/lld/test/MachO/undef-suggest-extern-c.s
new file mode 100644
index 0000000000000..10bca0fbc1682
--- /dev/null
+++ b/lld/test/MachO/undef-suggest-extern-c.s
@@ -0,0 +1,19 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos %s -o %t.o
+
+## The reference is mangled while the definition is not, suggest a missing
+## extern "C".
+# RUN: echo 'call __Z3fooi' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
+# RUN: not %lld %t.o %t1.o -demangle -o /dev/null 2>&1 | FileCheck %s
+
+# CHECK:      error: undefined symbol: foo(int)
+# CHECK-NEXT: >>> referenced by {{.*}}
+# CHECK-NEXT: >>> did you mean: extern "C" _foo
+
+## Don't suggest for nested names like F::foo() and foo::foo().
+# RUN: echo 'call __ZN1F3fooEv; call __ZN3fooC1Ev' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t2.o
+# RUN: not ld.lld %t.o %t2.o -o /dev/null 2>&1 | FileCheck /dev/null --implicit-check-not='did you mean'
+
+.globl _start, _foo
+_start:
+_foo:

diff  --git a/lld/test/MachO/undef-suggest-extern-c2.s b/lld/test/MachO/undef-suggest-extern-c2.s
new file mode 100644
index 0000000000000..afdfbcfa415f6
--- /dev/null
+++ b/lld/test/MachO/undef-suggest-extern-c2.s
@@ -0,0 +1,21 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos %s -o %t.o
+
+## The definition is mangled while the reference is not, suggest an arbitrary
+## C++ overload.
+# RUN: echo '.globl __Z3fooi; __Z3fooi:' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
+# RUN: not %lld %t.o %t1.o -demangle -o /dev/null 2>&1 | FileCheck %s
+
+## Check that we can suggest a local definition.
+# RUN: echo '__Z3fooi: call _foo' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t2.o
+# RUN: not %lld %t2.o -demangle -o /dev/null 2>&1 | FileCheck %s
+
+# CHECK:      error: undefined symbol: _foo
+# CHECK-NEXT: >>> referenced by {{.*}}
+# CHECK-NEXT: >>> did you mean to declare foo(int) as extern "C"?
+
+## Don't suggest nested names whose base name is "foo", e.g. F::foo().
+# RUN: echo '.globl __ZN1F3fooEv; __ZN1F3fooEv:' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t3.o
+# RUN: not %lld %t.o %t3.o -o /dev/null 2>&1 | FileCheck /dev/null --implicit-check-not='did you mean'
+
+call _foo


        


More information about the llvm-commits mailing list