[llvm-branch-commits] [lld] 764e282 - [LLD][COFF] Skip computation of the undefined symbols references that are not shown

Hans Wennborg via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Jul 22 04:13:32 PDT 2020


Author: Sylvain Audi
Date: 2020-07-22T13:12:04+02:00
New Revision: 764e28231e4b3b04748ac1c85576402bb76919de

URL: https://github.com/llvm/llvm-project/commit/764e28231e4b3b04748ac1c85576402bb76919de
DIFF: https://github.com/llvm/llvm-project/commit/764e28231e4b3b04748ac1c85576402bb76919de.diff

LOG: [LLD][COFF] Skip computation of the undefined symbols references that are not shown

The "undefined symbol" error message from lld-link displays up to 3 references to that symbol, and the number of extra references not shown.

This patch removes the computation of the strings for those extra references.

It fixes a freeze of lld-link we accidentally encountered when activating asan on a large project, without linking with the asan library.
In that case, __asan_report_load8 was referenced more than 2 million times, causing the computation of that many display strings, of which only 3 were used.

Differential Revision: https://reviews.llvm.org/D83510

(cherry picked from commit 3a108ab256dba7b5a7304f0e83818673d334405f)

Added: 
    lld/test/COFF/Inputs/undefined-symbol-multi-lto.ll

Modified: 
    lld/COFF/SymbolTable.cpp
    lld/test/COFF/undefined-symbol-multi.s

Removed: 
    


################################################################################
diff  --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp
index d4d2a159a639..173e32f628ef 100644
--- a/lld/COFF/SymbolTable.cpp
+++ b/lld/COFF/SymbolTable.cpp
@@ -136,12 +136,16 @@ getFileLine(const SectionChunk *c, uint32_t addr) {
 // of all references to that symbol from that file. If no debug information is
 // available, returns just the name of the file, else one string per actual
 // reference as described in the debug info.
-std::vector<std::string> getSymbolLocations(ObjFile *file, uint32_t symIndex) {
+// Returns up to maxStrings string descriptions, along with the total number of
+// locations found.
+static std::pair<std::vector<std::string>, size_t>
+getSymbolLocations(ObjFile *file, uint32_t symIndex, size_t maxStrings) {
   struct Location {
     Symbol *sym;
     std::pair<StringRef, uint32_t> fileLine;
   };
   std::vector<Location> locations;
+  size_t numLocations = 0;
 
   for (Chunk *c : file->getChunks()) {
     auto *sc = dyn_cast<SectionChunk>(c);
@@ -150,6 +154,10 @@ std::vector<std::string> getSymbolLocations(ObjFile *file, uint32_t symIndex) {
     for (const coff_relocation &r : sc->getRelocs()) {
       if (r.SymbolTableIndex != symIndex)
         continue;
+      numLocations++;
+      if (locations.size() >= maxStrings)
+        continue;
+
       Optional<std::pair<StringRef, uint32_t>> fileLine =
           getFileLine(sc, r.VirtualAddress);
       Symbol *sym = getSymbol(sc, r.VirtualAddress);
@@ -160,8 +168,12 @@ std::vector<std::string> getSymbolLocations(ObjFile *file, uint32_t symIndex) {
     }
   }
 
-  if (locations.empty())
-    return std::vector<std::string>({"\n>>> referenced by " + toString(file)});
+  if (maxStrings == 0)
+    return std::make_pair(std::vector<std::string>(), numLocations);
+
+  if (numLocations == 0)
+    return std::make_pair(
+        std::vector<std::string>{"\n>>> referenced by " + toString(file)}, 1);
 
   std::vector<std::string> symbolLocations(locations.size());
   size_t i = 0;
@@ -175,17 +187,26 @@ std::vector<std::string> getSymbolLocations(ObjFile *file, uint32_t symIndex) {
     if (loc.sym)
       os << ":(" << toString(*loc.sym) << ')';
   }
-  return symbolLocations;
+  return std::make_pair(symbolLocations, numLocations);
+}
+
+std::vector<std::string> getSymbolLocations(ObjFile *file, uint32_t symIndex) {
+  return getSymbolLocations(file, symIndex, SIZE_MAX).first;
 }
 
-std::vector<std::string> getSymbolLocations(InputFile *file,
-                                            uint32_t symIndex) {
+static std::pair<std::vector<std::string>, size_t>
+getSymbolLocations(InputFile *file, uint32_t symIndex, size_t maxStrings) {
   if (auto *o = dyn_cast<ObjFile>(file))
-    return getSymbolLocations(o, symIndex);
-  if (auto *b = dyn_cast<BitcodeFile>(file))
-    return getSymbolLocations(b);
+    return getSymbolLocations(o, symIndex, maxStrings);
+  if (auto *b = dyn_cast<BitcodeFile>(file)) {
+    std::vector<std::string> symbolLocations = getSymbolLocations(b);
+    size_t numLocations = symbolLocations.size();
+    if (symbolLocations.size() > maxStrings)
+      symbolLocations.resize(maxStrings);
+    return std::make_pair(symbolLocations, numLocations);
+  }
   llvm_unreachable("unsupported file type passed to getSymbolLocations");
-  return {};
+  return std::make_pair(std::vector<std::string>(), (size_t)0);
 }
 
 // For an undefined symbol, stores all files referencing it and the index of
@@ -205,20 +226,21 @@ static void reportUndefinedSymbol(const UndefinedDiag &undefDiag) {
   os << "undefined symbol: " << toString(*undefDiag.sym);
 
   const size_t maxUndefReferences = 3;
-  size_t i = 0, numRefs = 0;
+  size_t numDisplayedRefs = 0, numRefs = 0;
   for (const UndefinedDiag::File &ref : undefDiag.files) {
-    std::vector<std::string> symbolLocations =
-        getSymbolLocations(ref.file, ref.symIndex);
-    numRefs += symbolLocations.size();
+    std::vector<std::string> symbolLocations;
+    size_t totalLocations = 0;
+    std::tie(symbolLocations, totalLocations) = getSymbolLocations(
+        ref.file, ref.symIndex, maxUndefReferences - numDisplayedRefs);
+
+    numRefs += totalLocations;
+    numDisplayedRefs += symbolLocations.size();
     for (const std::string &s : symbolLocations) {
-      if (i >= maxUndefReferences)
-        break;
       os << s;
-      i++;
     }
   }
-  if (i < numRefs)
-    os << "\n>>> referenced " << numRefs - i << " more times";
+  if (numDisplayedRefs < numRefs)
+    os << "\n>>> referenced " << numRefs - numDisplayedRefs << " more times";
   errorOrWarn(os.str());
 }
 

diff  --git a/lld/test/COFF/Inputs/undefined-symbol-multi-lto.ll b/lld/test/COFF/Inputs/undefined-symbol-multi-lto.ll
new file mode 100644
index 000000000000..5f6730272e61
--- /dev/null
+++ b/lld/test/COFF/Inputs/undefined-symbol-multi-lto.ll
@@ -0,0 +1,23 @@
+target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-windows-msvc"
+
+define dso_local i32 @"?baz@@YAHXZ"() #0 {
+  %1 = call i32 @"?foo@@YAHXZ"()
+  %2 = call i32 @"?foo@@YAHXZ"()
+  %3 = call i32 @"?bar@@YAHXZ"()
+  %4 = call i32 @"?bar@@YAHXZ"()
+  ret i32 0
+}
+
+declare dso_local i32 @"?foo@@YAHXZ"() #1
+
+declare dso_local i32 @"?bar@@YAHXZ"() #1
+
+attributes #0 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 2}
+!1 = !{!"clang version 11.0.0 (https://github.com/llvm/llvm-project.git bed3e1a99b41f5a9525bc0edf12ecbcf63aab0cf)"}

diff  --git a/lld/test/COFF/undefined-symbol-multi.s b/lld/test/COFF/undefined-symbol-multi.s
index 2d3d8285338c..086d12828b2d 100644
--- a/lld/test/COFF/undefined-symbol-multi.s
+++ b/lld/test/COFF/undefined-symbol-multi.s
@@ -16,17 +16,20 @@
 # RUN: echo '  call "?foo@@YAHXZ"' >> %t.moreref.s
 # RUN: echo '  call "?foo@@YAHXZ"' >> %t.moreref.s
 # RUN: llvm-mc -triple=x86_64-windows-msvc -filetype=obj -o %t2.obj %t.moreref.s
-# RUN: not lld-link /out:/dev/null  %t.obj %t2.obj 2>&1 | FileCheck %s
+# RUN: llvm-as %S/Inputs/undefined-symbol-multi-lto.ll -o %t3.obj
+# RUN: not lld-link /out:/dev/null  %t.obj %t2.obj %t3.obj 2>&1 | FileCheck %s
 
 # CHECK: error: undefined symbol: int __cdecl foo(void)
 # CHECK-NEXT: >>> referenced by {{.*}}tmp.obj:(main)
 # CHECK-NEXT: >>> referenced by {{.*}}tmp.obj:(main)
 # CHECK-NEXT: >>> referenced by {{.*}}tmp2.obj:(bar)
-# CHECK-NEXT: >>> referenced 9 more times
+# CHECK-NEXT: >>> referenced 10 more times
 # CHECK-EMPTY:
 # CHECK-NEXT: error: undefined symbol: int __cdecl bar(void)
 # CHECK-NEXT: >>> referenced by {{.*}}.obj:(main)
 # CHECK-NEXT: >>> referenced by {{.*}}.obj:(f1)
+# CHECK-NEXT: >>> referenced by {{.*}}undefined-symbol-multi-lto.ll
+# CHECK-NEXT: >>>               {{.*}}tmp3.obj
 
         .section        .text,"xr",one_only,main
 .globl main


        


More information about the llvm-branch-commits mailing list