[lld] 82ca390 - Reland "[lld/mac] Port typo correction for undefined symbols from ELF port"
Nico Weber via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 13 07:34:21 PDT 2022
Author: Nico Weber
Date: 2022-10-13T10:33:47-04:00
New Revision: 82ca390062d11512528e3d357d8d3d7b69477caf
URL: https://github.com/llvm/llvm-project/commit/82ca390062d11512528e3d357d8d3d7b69477caf
DIFF: https://github.com/llvm/llvm-project/commit/82ca390062d11512528e3d357d8d3d7b69477caf.diff
LOG: Reland "[lld/mac] Port typo correction for undefined symbols from ELF port"
The only difference in the reland is that the loop at the top of
getAlternativeSpelling() now calls dyn_cast_or_null() instead
of dyn_cast() -- a file's symbols list can contain null entries.
The test for this might be slightly involved, so I'll land it in
a follow-up, to make the reland similar to the original commit.
Originally reviewed at:
Differential Revision: https://reviews.llvm.org/D135038
This reverts commit 317b5582b813c51d1fb6723fd44b227b7f274bc7.
Added:
lld/test/MachO/undef-spell-corrector.s
lld/test/MachO/undef-suggest-extern-c.s
lld/test/MachO/undef-suggest-extern-c2.s
Modified:
lld/MachO/SymbolTable.cpp
Removed:
################################################################################
diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp
index 5173fa069e22b..8ad7dbb8a89cf 100644
--- a/lld/MachO/SymbolTable.cpp
+++ b/lld/MachO/SymbolTable.cpp
@@ -392,8 +392,138 @@ void macho::reportPendingDuplicateSymbols() {
}
}
+// Check whether the definition name def is a mangled function name that matches
+// the reference name ref.
+static bool canSuggestExternCForCXX(StringRef ref, StringRef def) {
+ llvm::ItaniumPartialDemangler d;
+ std::string name = def.str();
+ if (d.partialDemangle(name.c_str()))
+ return false;
+ char *buf = d.getFunctionName(nullptr, nullptr);
+ if (!buf)
+ return false;
+ bool ret = ref == buf;
+ free(buf);
+ return ret;
+}
+
+// Suggest an alternative spelling of an "undefined symbol" diagnostic. Returns
+// the suggested symbol, which is either in the symbol table, or in the same
+// file of sym.
+static const Symbol *getAlternativeSpelling(const Undefined &sym,
+ std::string &pre_hint,
+ std::string &post_hint) {
+ DenseMap<StringRef, const Symbol *> map;
+ if (sym.getFile() && sym.getFile()->kind() == InputFile::ObjKind) {
+ // Build a map of local defined symbols.
+ for (const Symbol *s : sym.getFile()->symbols)
+ if (auto *defined = dyn_cast_or_null<Defined>(s))
+ if (!defined->isExternal())
+ map.try_emplace(s->getName(), s);
+ }
+
+ auto suggest = [&](StringRef newName) -> const Symbol * {
+ // If defined locally.
+ if (const Symbol *s = map.lookup(newName))
+ return s;
+
+ // If in the symbol table and not undefined.
+ if (const Symbol *s = symtab->find(newName))
+ if (dyn_cast<Undefined>(s) == nullptr)
+ return s;
+
+ return nullptr;
+ };
+
+ // This loop enumerates all strings of Levenshtein distance 1 as typo
+ // correction candidates and suggests the one that exists as a non-undefined
+ // symbol.
+ StringRef name = sym.getName();
+ for (size_t i = 0, e = name.size(); i != e + 1; ++i) {
+ // Insert a character before name[i].
+ std::string newName = (name.substr(0, i) + "0" + name.substr(i)).str();
+ for (char c = '0'; c <= 'z'; ++c) {
+ newName[i] = c;
+ if (const Symbol *s = suggest(newName))
+ return s;
+ }
+ if (i == e)
+ break;
+
+ // Substitute name[i].
+ newName = std::string(name);
+ for (char c = '0'; c <= 'z'; ++c) {
+ newName[i] = c;
+ if (const Symbol *s = suggest(newName))
+ return s;
+ }
+
+ // Transpose name[i] and name[i+1]. This is of edit distance 2 but it is
+ // common.
+ if (i + 1 < e) {
+ newName[i] = name[i + 1];
+ newName[i + 1] = name[i];
+ if (const Symbol *s = suggest(newName))
+ return s;
+ }
+
+ // Delete name[i].
+ newName = (name.substr(0, i) + name.substr(i + 1)).str();
+ if (const Symbol *s = suggest(newName))
+ return s;
+ }
+
+ // Case mismatch, e.g. Foo vs FOO.
+ for (auto &it : map)
+ if (name.equals_insensitive(it.first))
+ return it.second;
+ for (Symbol *sym : symtab->getSymbols())
+ if (dyn_cast<Undefined>(sym) == nullptr &&
+ name.equals_insensitive(sym->getName()))
+ return sym;
+
+ // The reference may be a mangled name while the definition is not. Suggest a
+ // missing extern "C".
+ if (name.startswith("__Z")) {
+ std::string buf = name.str();
+ llvm::ItaniumPartialDemangler d;
+ if (!d.partialDemangle(buf.c_str()))
+ if (char *buf = d.getFunctionName(nullptr, nullptr)) {
+ const Symbol *s = suggest((Twine("_") + buf).str());
+ free(buf);
+ if (s) {
+ pre_hint = ": extern \"C\" ";
+ return s;
+ }
+ }
+ } else {
+ StringRef name_without_underscore = name;
+ name_without_underscore.consume_front("_");
+ const Symbol *s = nullptr;
+ for (auto &it : map)
+ if (canSuggestExternCForCXX(name_without_underscore, it.first)) {
+ s = it.second;
+ break;
+ }
+ if (!s)
+ for (Symbol *sym : symtab->getSymbols())
+ if (canSuggestExternCForCXX(name_without_underscore, sym->getName())) {
+ s = sym;
+ break;
+ }
+ if (s) {
+ pre_hint = " to declare ";
+ post_hint = " as extern \"C\"?";
+ return s;
+ }
+ }
+
+ return nullptr;
+}
+
static void reportUndefinedSymbol(const Undefined &sym,
- const UndefinedDiag &locations) {
+ const UndefinedDiag &locations,
+ bool correctSpelling) {
std::string message = "undefined symbol";
if (config->archMultiple)
message += (" for arch " + getArchitectureName(config->arch())).str();
@@ -426,6 +556,17 @@ static void reportUndefinedSymbol(const Undefined &sym,
("\n>>> referenced " + Twine(totalReferences - i) + " more times")
.str();
+ if (correctSpelling) {
+ std::string pre_hint = ": ", post_hint;
+ if (const Symbol *corrected =
+ getAlternativeSpelling(sym, pre_hint, post_hint)) {
+ message +=
+ "\n>>> did you mean" + pre_hint + toString(*corrected) + post_hint;
+ if (corrected->getFile())
+ message += "\n>>> defined in: " + toString(corrected->getFile());
+ }
+ }
+
if (config->undefinedSymbolTreatment == UndefinedSymbolTreatment::error)
error(message);
else if (config->undefinedSymbolTreatment ==
@@ -436,8 +577,9 @@ static void reportUndefinedSymbol(const Undefined &sym,
}
void macho::reportPendingUndefinedSymbols() {
- for (const auto &undef : undefs)
- reportUndefinedSymbol(*undef.first, undef.second);
+ // Enable spell corrector for the first 2 diagnostics.
+ for (const auto &[i, undef] : llvm::enumerate(undefs))
+ reportUndefinedSymbol(*undef.first, undef.second, i < 2);
// This function is called multiple times during execution. Clear the printed
// diagnostics to avoid printing the same things again the next time.
diff --git a/lld/test/MachO/undef-spell-corrector.s b/lld/test/MachO/undef-spell-corrector.s
new file mode 100644
index 0000000000000..cfec062bfb532
--- /dev/null
+++ b/lld/test/MachO/undef-spell-corrector.s
@@ -0,0 +1,79 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos %s -o %t.o
+
+## Insert a character.
+## The spell corrector is enabled for the first two "undefined symbol" diagnostics.
+# RUN: echo 'call bcde; call abcd; call abde' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
+# RUN: not %lld %t.o %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=INSERT %s -DFILE=%t.o
+
+## Symbols defined in DSO can be suggested.
+# RUN: %lld %t.o -dylib -o %t.dylib
+# RUN: not %lld %t.dylib %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=INSERT %s -DFILE=%t.dylib
+
+# INSERT: error: undefined symbol: abde
+# INSERT-NEXT: >>> referenced by {{.*}}
+# INSERT-NEXT: >>> did you mean: abcde
+# INSERT-NEXT: >>> defined in: [[FILE]]
+# INSERT: error: undefined symbol: abcd
+# INSERT-NEXT: >>> referenced by {{.*}}
+# INSERT-NEXT: >>> did you mean: abcde
+# INSERT-NEXT: >>> defined in: [[FILE]]
+# INSERT: error: undefined symbol: bcde
+# INSERT-NEXT: >>> referenced by {{.*}}
+# INSERT-NOT: >>>
+
+## Substitute a character.
+# RUN: echo 'call bbcde; call abcdd' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
+# RUN: not %lld %t.o %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=SUBST %s
+
+# SUBST: error: undefined symbol: abcdd
+# SUBST-NEXT: >>> referenced by {{.*}}
+# SUBST-NEXT: >>> did you mean: abcde
+# SUBST: error: undefined symbol: bbcde
+# SUBST-NEXT: >>> referenced by {{.*}}
+# SUBST-NEXT: >>> did you mean: abcde
+
+## Delete a character.
+# RUN: echo 'call aabcde; call abcdee' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
+# RUN: not %lld %t.o %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=DELETE %s
+
+# DELETE: error: undefined symbol: abcdee
+# DELETE-NEXT: >>> referenced by {{.*}}
+# DELETE-NEXT: >>> did you mean: abcde
+# DELETE: error: undefined symbol: aabcde
+# DELETE-NEXT: >>> referenced by {{.*}}
+# DELETE-NEXT: >>> did you mean: abcde
+
+## Transpose.
+# RUN: echo 'call bacde' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
+# RUN: not %lld %t.o %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=TRANSPOSE %s
+
+# TRANSPOSE: error: undefined symbol: bacde
+# TRANSPOSE-NEXT: >>> referenced by {{.*}}
+# TRANSPOSE-NEXT: >>> did you mean: abcde
+
+## Missing const qualifier.
+# RUN: echo 'call __Z3fooPi' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
+# RUN: not %lld %t.o %t1.o -demangle -o /dev/null 2>&1 | FileCheck --check-prefix=CONST %s
+## Local defined symbols.
+# RUN: echo '__Z3fooPKi: call __Z3fooPi' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
+# RUN: not %lld %t1.o -demangle -o /dev/null 2>&1 | FileCheck --check-prefix=CONST %s
+
+# CONST: error: undefined symbol: foo(int*)
+# CONST-NEXT: >>> referenced by {{.*}}
+# CONST-NEXT: >>> did you mean: foo(int const*)
+
+## Case mismatch.
+# RUN: echo 'call __Z3FOOPKi' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
+# RUN: not %lld %t.o %t1.o -demangle -o /dev/null 2>&1 | FileCheck --check-prefix=CASE %s
+# RUN: echo '__Z3fooPKi: call __Z3FOOPKi' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
+# RUN: not %lld %t1.o -demangle -o /dev/null 2>&1 | FileCheck --check-prefix=CASE %s
+
+# CASE: error: undefined symbol: FOO(int const*)
+# CASE-NEXT: >>> referenced by {{.*}}
+# CASE-NEXT: >>> did you mean: foo(int const*)
+
+.globl _main, abcde, __Z3fooPKi
+_main:
+abcde:
+__Z3fooPKi:
diff --git a/lld/test/MachO/undef-suggest-extern-c.s b/lld/test/MachO/undef-suggest-extern-c.s
new file mode 100644
index 0000000000000..8c53c8e602cad
--- /dev/null
+++ b/lld/test/MachO/undef-suggest-extern-c.s
@@ -0,0 +1,19 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos %s -o %t.o
+
+## The reference is mangled while the definition is not, suggest a missing
+## extern "C".
+# RUN: echo 'call __Z3fooi' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
+# RUN: not %lld %t.o %t1.o -demangle -o /dev/null 2>&1 | FileCheck %s
+
+# CHECK: error: undefined symbol: foo(int)
+# CHECK-NEXT: >>> referenced by {{.*}}
+# CHECK-NEXT: >>> did you mean: extern "C" foo
+
+## Don't suggest for nested names like F::foo() and foo::foo().
+# RUN: echo 'call __ZN1F3fooEv; call __ZN3fooC1Ev' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t2.o
+# RUN: not ld.lld %t.o %t2.o -o /dev/null 2>&1 | FileCheck /dev/null --implicit-check-not='did you mean'
+
+.globl _start, _foo
+_start:
+_foo:
diff --git a/lld/test/MachO/undef-suggest-extern-c2.s b/lld/test/MachO/undef-suggest-extern-c2.s
new file mode 100644
index 0000000000000..152fe3877e2dd
--- /dev/null
+++ b/lld/test/MachO/undef-suggest-extern-c2.s
@@ -0,0 +1,21 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos %s -o %t.o
+
+## The definition is mangled while the reference is not, suggest an arbitrary
+## C++ overload.
+# RUN: echo '.globl __Z3fooi; __Z3fooi:' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
+# RUN: not %lld %t.o %t1.o -demangle -o /dev/null 2>&1 | FileCheck %s
+
+## Check that we can suggest a local definition.
+# RUN: echo '__Z3fooi: call _foo' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t2.o
+# RUN: not %lld %t2.o -demangle -o /dev/null 2>&1 | FileCheck %s
+
+# CHECK: error: undefined symbol: foo
+# CHECK-NEXT: >>> referenced by {{.*}}
+# CHECK-NEXT: >>> did you mean to declare foo(int) as extern "C"?
+
+## Don't suggest nested names whose base name is "foo", e.g. F::foo().
+# RUN: echo '.globl __ZN1F3fooEv; __ZN1F3fooEv:' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t3.o
+# RUN: not %lld %t.o %t3.o -o /dev/null 2>&1 | FileCheck /dev/null --implicit-check-not='did you mean'
+
+call _foo
More information about the llvm-commits
mailing list