[llvm] 2b27948 - [symbolizer] Support symbol lookup

Serge Pavlov via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 2 07:39:51 PDT 2023


Author: Serge Pavlov
Date: 2023-10-02T21:38:15+07:00
New Revision: 2b27948783e4bbc1132d3220d8517ef62607b558

URL: https://github.com/llvm/llvm-project/commit/2b27948783e4bbc1132d3220d8517ef62607b558
DIFF: https://github.com/llvm/llvm-project/commit/2b27948783e4bbc1132d3220d8517ef62607b558.diff

LOG: [symbolizer] Support symbol lookup

Recent versions of GNU binutils starting from 2.39 support symbol+offset
lookup in addition to the usual numeric address lookup. This change adds
symbol lookup to llvm-symbolize and llvm-addr2line.

Now llvm-symbolize behaves closer to GNU addr2line, - if the value specified
as address in command line or input stream is not a number, it is treated as
a symbol name. For example:

    llvm-symbolize --obj=abc.so func_22
    llvm-symbolize --obj=abc.so "CODE func_22"

This lookup is now supported only for functions. Specification with
offset is not supported yet.

Differential Revision: https://reviews.llvm.org/D149759

Added: 
    llvm/test/tools/llvm-symbolizer/Inputs/symbols.h
    llvm/test/tools/llvm-symbolizer/Inputs/symbols.part1.cpp
    llvm/test/tools/llvm-symbolizer/Inputs/symbols.part2.cpp
    llvm/test/tools/llvm-symbolizer/Inputs/symbols.part3.c
    llvm/test/tools/llvm-symbolizer/Inputs/symbols.part4.c
    llvm/test/tools/llvm-symbolizer/Inputs/symbols.so
    llvm/test/tools/llvm-symbolizer/symbol-search.test

Modified: 
    llvm/docs/CommandGuide/llvm-symbolizer.rst
    llvm/docs/ReleaseNotes.rst
    llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h
    llvm/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h
    llvm/include/llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h
    llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
    llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp
    llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
    llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
    llvm/test/tools/llvm-symbolizer/Inputs/addr.inp
    llvm/test/tools/llvm-symbolizer/Inputs/discrim.inp
    llvm/test/tools/llvm-symbolizer/output-style-json-code.test
    llvm/test/tools/llvm-symbolizer/output-style-json-data.test
    llvm/test/tools/llvm-symbolizer/output-style-json-frame.ll
    llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
    llvm/unittests/ProfileData/MemProfTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/docs/CommandGuide/llvm-symbolizer.rst b/llvm/docs/CommandGuide/llvm-symbolizer.rst
index fe5df077b45664d..59c0ab6d196ace1 100644
--- a/llvm/docs/CommandGuide/llvm-symbolizer.rst
+++ b/llvm/docs/CommandGuide/llvm-symbolizer.rst
@@ -14,7 +14,7 @@ DESCRIPTION
 :program:`llvm-symbolizer` reads input names and addresses from the command-line
 and prints corresponding source code locations to standard output. It can also
 symbolize logs containing :doc:`Symbolizer Markup </SymbolizerMarkupFormat>` via
-:option:`--filter-markup`.
+:option:`--filter-markup`. Addresses may be specified as numbers or symbol names.
 
 If no address is specified on the command-line, it reads the addresses from
 standard input. If no input name is specified on the command-line, but addresses
@@ -196,6 +196,17 @@ shows --relativenames.
   main
   foo/test.cpp:15:0
 
+Example 7 - Addresses as symbol names:
+
+.. code-block:: console
+
+  $ llvm-symbolizer --obj=test.elf main
+  main
+  /tmp/test.cpp:14:0
+  $ llvm-symbolizer --obj=test.elf "CODE foz"
+  foz
+  /tmp/test.h:1:0
+
 OPTIONS
 -------
 

diff  --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 660bb4e70a5a707..8317056ffaf5a56 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -180,6 +180,8 @@ Changes to the LLVM tools
 * ``llvm-nm`` now supports the ``--line-numbers`` (``-l``) option to use
   debugging information to print symbols' filenames and line numbers.
 
+* llvm-symbolizer and llvm-addr2line now support addresses specified as symbol names.
+
 Changes to LLDB
 ---------------------------------
 

diff  --git a/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h b/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h
index 026f917ced5bc1f..72ffdd29f1b72d5 100644
--- a/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h
+++ b/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h
@@ -34,6 +34,7 @@ class SourceCode;
 struct Request {
   StringRef ModuleName;
   std::optional<uint64_t> Address;
+  StringRef Symbol;
 };
 
 class DIPrinter {
@@ -46,6 +47,8 @@ class DIPrinter {
   virtual void print(const Request &Request, const DIGlobal &Global) = 0;
   virtual void print(const Request &Request,
                      const std::vector<DILocal> &Locals) = 0;
+  virtual void print(const Request &Request,
+                     const std::vector<DILineInfo> &Locations) = 0;
 
   virtual bool printError(const Request &Request,
                           const ErrorInfoBase &ErrorInfo) = 0;
@@ -91,6 +94,8 @@ class PlainPrinterBase : public DIPrinter {
   void print(const Request &Request, const DIGlobal &Global) override;
   void print(const Request &Request,
              const std::vector<DILocal> &Locals) override;
+  void print(const Request &Request,
+             const std::vector<DILineInfo> &Locations) override;
 
   bool printError(const Request &Request,
                   const ErrorInfoBase &ErrorInfo) override;
@@ -141,6 +146,8 @@ class JSONPrinter : public DIPrinter {
   void print(const Request &Request, const DIGlobal &Global) override;
   void print(const Request &Request,
              const std::vector<DILocal> &Locals) override;
+  void print(const Request &Request,
+             const std::vector<DILineInfo> &Locations) override;
 
   bool printError(const Request &Request,
                   const ErrorInfoBase &ErrorInfo) override;

diff  --git a/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h b/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h
index 51e92b83eadbac0..255932d35cda114 100644
--- a/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h
+++ b/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h
@@ -36,6 +36,9 @@ class SymbolizableModule {
   virtual std::vector<DILocal>
   symbolizeFrame(object::SectionedAddress ModuleOffset) const = 0;
 
+  virtual std::vector<object::SectionedAddress>
+  findSymbol(StringRef Symbol) const = 0;
+
   // Return true if this is a 32-bit x86 PE COFF module.
   virtual bool isWin32Module() const = 0;
 

diff  --git a/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h b/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h
index 075dbe3e0e372ed..311fa201d900e4a 100644
--- a/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h
+++ b/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h
@@ -43,6 +43,8 @@ class SymbolizableObjectFile : public SymbolizableModule {
   DIGlobal symbolizeData(object::SectionedAddress ModuleOffset) const override;
   std::vector<DILocal>
   symbolizeFrame(object::SectionedAddress ModuleOffset) const override;
+  std::vector<object::SectionedAddress>
+  findSymbol(StringRef Symbol) const override;
 
   // Return true if this is a 32-bit x86 PE COFF module.
   bool isWin32Module() const override;

diff  --git a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
index 99a7f219baaa09f..bc4aa74073a6557 100644
--- a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
+++ b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
@@ -104,6 +104,14 @@ class LLVMSymbolizer {
   Expected<std::vector<DILocal>>
   symbolizeFrame(ArrayRef<uint8_t> BuildID,
                  object::SectionedAddress ModuleOffset);
+
+  Expected<std::vector<DILineInfo>> findSymbol(const ObjectFile &Obj,
+                                               StringRef Symbol);
+  Expected<std::vector<DILineInfo>> findSymbol(StringRef ModuleName,
+                                               StringRef Symbol);
+  Expected<std::vector<DILineInfo>> findSymbol(ArrayRef<uint8_t> BuildID,
+                                               StringRef Symbol);
+
   void flush();
 
   // Evict entries from the binary cache until it is under the maximum size
@@ -146,6 +154,9 @@ class LLVMSymbolizer {
   Expected<std::vector<DILocal>>
   symbolizeFrameCommon(const T &ModuleSpecifier,
                        object::SectionedAddress ModuleOffset);
+  template <typename T>
+  Expected<std::vector<DILineInfo>> findSymbolCommon(const T &ModuleSpecifier,
+                                                     StringRef Symbol);
 
   Expected<SymbolizableModule *> getOrCreateModuleInfo(const ObjectFile &Obj);
 

diff  --git a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp
index dcf5eee2bb32b61..d7b33ce1d0f062d 100644
--- a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp
@@ -260,6 +260,17 @@ void PlainPrinterBase::print(const Request &Request,
   printFooter();
 }
 
+void PlainPrinterBase::print(const Request &Request,
+                             const std::vector<DILineInfo> &Locations) {
+  if (Locations.empty()) {
+    print(Request, DILineInfo());
+  } else {
+    for (const DILineInfo &L : Locations)
+      print(L, false);
+    printFooter();
+  }
+}
+
 bool PlainPrinterBase::printError(const Request &Request,
                                   const ErrorInfoBase &ErrorInfo) {
   ErrHandler(ErrorInfo, Request.ModuleName);
@@ -273,6 +284,8 @@ static std::string toHex(uint64_t V) {
 
 static json::Object toJSON(const Request &Request, StringRef ErrorMsg = "") {
   json::Object Json({{"ModuleName", Request.ModuleName.str()}});
+  if (!Request.Symbol.empty())
+    Json["SymName"] = Request.Symbol.str();
   if (Request.Address)
     Json["Address"] = toHex(*Request.Address);
   if (!ErrorMsg.empty())
@@ -362,6 +375,19 @@ void JSONPrinter::print(const Request &Request,
     printJSON(std::move(Json));
 }
 
+void JSONPrinter::print(const Request &Request,
+                        const std::vector<DILineInfo> &Locations) {
+  json::Array Definitions;
+  for (const DILineInfo &L : Locations)
+    Definitions.push_back(toJSON(L));
+  json::Object Json = toJSON(Request);
+  Json["Loc"] = std::move(Definitions);
+  if (ObjectList)
+    ObjectList->push_back(std::move(Json));
+  else
+    printJSON(std::move(Json));
+}
+
 bool JSONPrinter::printError(const Request &Request,
                              const ErrorInfoBase &ErrorInfo) {
   json::Object Json = toJSON(Request, ErrorInfo.message());

diff  --git a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
index 6b8068a531c05fa..697303038507a96 100644
--- a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
@@ -351,6 +351,19 @@ std::vector<DILocal> SymbolizableObjectFile::symbolizeFrame(
   return DebugInfoContext->getLocalsForAddress(ModuleOffset);
 }
 
+std::vector<object::SectionedAddress>
+SymbolizableObjectFile::findSymbol(StringRef Symbol) const {
+  std::vector<object::SectionedAddress> Result;
+  for (const SymbolDesc &Sym : Symbols) {
+    if (Sym.Name.equals(Symbol)) {
+      object::SectionedAddress A{Sym.Addr,
+                                 getModuleSectionIndexForAddress(Sym.Addr)};
+      Result.push_back(A);
+    }
+  }
+  return Result;
+}
+
 /// Search for the first occurence of specified Address in ObjectFile.
 uint64_t SymbolizableObjectFile::getModuleSectionIndexForAddress(
     uint64_t Address) const {

diff  --git a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
index 517f1e7dc284f9c..36d112a5f3fb299 100644
--- a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
@@ -231,6 +231,50 @@ LLVMSymbolizer::symbolizeFrame(ArrayRef<uint8_t> BuildID,
   return symbolizeFrameCommon(BuildID, ModuleOffset);
 }
 
+template <typename T>
+Expected<std::vector<DILineInfo>>
+LLVMSymbolizer::findSymbolCommon(const T &ModuleSpecifier, StringRef Symbol) {
+  auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
+  if (!InfoOrErr)
+    return InfoOrErr.takeError();
+
+  SymbolizableModule *Info = *InfoOrErr;
+  std::vector<DILineInfo> Result;
+
+  // A null module means an error has already been reported. Return an empty
+  // result.
+  if (!Info)
+    return Result;
+
+  for (object::SectionedAddress A : Info->findSymbol(Symbol)) {
+    DILineInfo LineInfo = Info->symbolizeCode(
+        A, DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions),
+        Opts.UseSymbolTable);
+    if (LineInfo.FileName != DILineInfo::BadString) {
+      if (Opts.Demangle)
+        LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info);
+      Result.push_back(LineInfo);
+    }
+  }
+
+  return Result;
+}
+
+Expected<std::vector<DILineInfo>>
+LLVMSymbolizer::findSymbol(const ObjectFile &Obj, StringRef Symbol) {
+  return findSymbolCommon(Obj, Symbol);
+}
+
+Expected<std::vector<DILineInfo>>
+LLVMSymbolizer::findSymbol(StringRef ModuleName, StringRef Symbol) {
+  return findSymbolCommon(ModuleName.str(), Symbol);
+}
+
+Expected<std::vector<DILineInfo>>
+LLVMSymbolizer::findSymbol(ArrayRef<uint8_t> BuildID, StringRef Symbol) {
+  return findSymbolCommon(BuildID, Symbol);
+}
+
 void LLVMSymbolizer::flush() {
   ObjectForUBPathAndArch.clear();
   LRUBinaries.clear();

diff  --git a/llvm/test/tools/llvm-symbolizer/Inputs/addr.inp b/llvm/test/tools/llvm-symbolizer/Inputs/addr.inp
index b5e146b114e254f..b19992175bf9952 100644
--- a/llvm/test/tools/llvm-symbolizer/Inputs/addr.inp
+++ b/llvm/test/tools/llvm-symbolizer/Inputs/addr.inp
@@ -1,3 +1,3 @@
-some text
+something not a valid address
 0x40054d
-some text2
+some text possibly a symbol

diff  --git a/llvm/test/tools/llvm-symbolizer/Inputs/discrim.inp b/llvm/test/tools/llvm-symbolizer/Inputs/discrim.inp
index a5cfcb2558f3594..2c4d722e3286237 100644
--- a/llvm/test/tools/llvm-symbolizer/Inputs/discrim.inp
+++ b/llvm/test/tools/llvm-symbolizer/Inputs/discrim.inp
@@ -5,4 +5,4 @@ some text
 0x4005b9
 0x4005ce
 0x4005d4
-some more text
+another text

diff  --git a/llvm/test/tools/llvm-symbolizer/Inputs/symbols.h b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.h
new file mode 100644
index 000000000000000..b097c4d9dc00a73
--- /dev/null
+++ b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.h
@@ -0,0 +1,19 @@
+// This file is a part of sources used to build `symbols.so`, which is used to
+// test symbol location search made by llvm-symbolizer.
+//
+// Build instructions:
+// $ mkdir /tmp/dbginfo
+// $ cp symbols.h symbols.part1.cpp symbols.part2.cpp symbols.part3.c symbols.part4.c /tmp/dbginfo/
+// $ cd /tmp/dbginfo
+// $ gcc -osymbols.so -shared -fPIC -g symbols.part1.cpp symbols.part2.cpp symbols.part3.c symbols.part4.c
+
+
+extern "C" {
+extern int global_01;
+int func_01();
+int func_02(int);
+}
+
+template<typename T> T func_03(T x) {
+  return x + T(1);
+}

diff  --git a/llvm/test/tools/llvm-symbolizer/Inputs/symbols.part1.cpp b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.part1.cpp
new file mode 100644
index 000000000000000..ad4b3e34411aa40
--- /dev/null
+++ b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.part1.cpp
@@ -0,0 +1,25 @@
+#include "symbols.h"
+
+int global_01 = 22;
+
+int static static_var = 0;
+
+static int static_func_01(int x) {
+  static_var = x;
+  return global_01;
+}
+
+int func_01() {
+  int res = 1;
+  return res + static_func_01(22);
+}
+
+int func_04() {
+  static_var = 0;
+  return 22;
+}
+
+int func_04(int x) {
+  int res = static_var;
+  return res + func_03(x);
+}

diff  --git a/llvm/test/tools/llvm-symbolizer/Inputs/symbols.part2.cpp b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.part2.cpp
new file mode 100644
index 000000000000000..35e66d62622f89c
--- /dev/null
+++ b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.part2.cpp
@@ -0,0 +1,18 @@
+#include "symbols.h"
+
+int static static_var = 4;
+
+static int static_func_01(int x) {
+  static_var--;
+  return x;
+}
+
+int func_02(int x) {
+  static_var = x;
+  return static_func_01(x);
+}
+
+int func_05(int x) {
+  int res = static_var;
+  return res + func_03(x);
+}

diff  --git a/llvm/test/tools/llvm-symbolizer/Inputs/symbols.part3.c b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.part3.c
new file mode 100644
index 000000000000000..1284be505b6bac4
--- /dev/null
+++ b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.part3.c
@@ -0,0 +1,12 @@
+static int static_func(int);
+static int static_var = 0;
+
+int static_func(int x) {
+  static_var++;
+  return static_var + x;
+}
+
+int func_06(int x) {
+  return static_func(x);
+}
+

diff  --git a/llvm/test/tools/llvm-symbolizer/Inputs/symbols.part4.c b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.part4.c
new file mode 100644
index 000000000000000..de2ac81d2a78cc4
--- /dev/null
+++ b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.part4.c
@@ -0,0 +1,13 @@
+static int static_func(int);
+static int static_var = 5;
+
+int static_func(int x) {
+  static_var++;
+  return static_var + x;
+}
+
+int func_07(int x) {
+  static_var++;
+  return static_func(x);
+}
+

diff  --git a/llvm/test/tools/llvm-symbolizer/Inputs/symbols.so b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.so
new file mode 100755
index 000000000000000..ceacd9845a8d880
Binary files /dev/null and b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.so 
diff er

diff  --git a/llvm/test/tools/llvm-symbolizer/output-style-json-code.test b/llvm/test/tools/llvm-symbolizer/output-style-json-code.test
index 9179b673f39fd00..0e0e61c0bf119a3 100644
--- a/llvm/test/tools/llvm-symbolizer/output-style-json-code.test
+++ b/llvm/test/tools/llvm-symbolizer/output-style-json-code.test
@@ -25,39 +25,44 @@
 # RUN: llvm-symbolizer --output-style=JSON --no-inlines -e %p/Inputs/addr.exe < %p/Inputs/addr.inp | \
 # RUN:   FileCheck %s --check-prefix=NO-INLINES --strict-whitespace --match-full-lines --implicit-check-not={{.}}
 ## Invalid first argument before any valid one.
-# NO-INLINES:{"ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]}
+# NO-INLINES:{"Loc":[],"ModuleName":"{{.*}}/Inputs/addr.exe","SymName":"something"}
 ## Resolve valid address.
 # NO-INLINES-NEXT:{"Address":"0x40054d","ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":3,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"main","Line":3,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":2}]}
 ## Invalid argument after a valid one.
-# NO-INLINES-NEXT:{"ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]}
+# NO-INLINES-NEXT:{"Loc":[],"ModuleName":"{{.*}}/Inputs/addr.exe","SymName":"some"}
 
 ## This test case is testing stdin input, inlines by default.
 # RUN: llvm-symbolizer --output-style=JSON -e %p/Inputs/addr.exe < %p/Inputs/addr.inp | \
 # RUN:   FileCheck %s --check-prefix=INLINE --strict-whitespace --match-full-lines --implicit-check-not={{.}}
 ## Invalid first argument before any valid one.
-# INLINE:{"ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]}
+# INLINE:{"Loc":[],"ModuleName":"{{.*}}/Inputs/addr.exe","SymName":"something"}
 ## Resolve valid address.
 # INLINE-NEXT:{"Address":"0x40054d","ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":3,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"inctwo","Line":3,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":2},{"Column":0,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"inc","Line":7,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":6},{"Column":0,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"main","Line":14,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":12}]}
 ## Invalid argument after a valid one.
-# INLINE-NEXT:{"ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]}
+# INLINE-NEXT:{"Loc":[],"ModuleName":"{{.*}}/Inputs/addr.exe","SymName":"some"}
 
 ## Also check the last test case with llvm-adr2line.
 ## The expected result is the same with -f -i.
 # RUN: llvm-addr2line --output-style=JSON -f -i -e %p/Inputs/addr.exe < %p/Inputs/addr.inp | \
 # RUN:   FileCheck %s --check-prefix=INLINE-A2L --strict-whitespace --match-full-lines --implicit-check-not={{.}}
 ## Invalid first argument before any valid one.
-# INLINE-A2L:{"ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]}
+# INLINE-A2L:{"Loc":[],"ModuleName":"{{.*}}/Inputs/addr.exe","SymName":"something"}
 ## Resolve valid address.
 # INLINE-A2L-NEXT:{"Address":"0x40054d","ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":3,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"inctwo","Line":3,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":2},{"Column":0,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"inc","Line":7,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":6},{"Column":0,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"main","Line":14,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":12}]}
 ## Invalid argument after a valid one.
-# INLINE-A2L-NEXT:{"ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]}
+# INLINE-A2L:{"Loc":[],"ModuleName":"{{.*}}/Inputs/addr.exe","SymName":"some"}
 
 ## Note llvm-addr2line without -f does not print the function name in JSON too.
 # RUN: llvm-addr2line --output-style=JSON -i -e %p/Inputs/addr.exe < %p/Inputs/addr.inp | \
 # RUN:   FileCheck %s --check-prefix=NO-FUNC-A2L --strict-whitespace --match-full-lines --implicit-check-not={{.}}
 ## Invalid first argument before any valid one.
-# NO-FUNC-A2L:{"ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]}
+# NO-FUNC-A2L:{"Loc":[],"ModuleName":"{{.*}}/Inputs/addr.exe","SymName":"something"}
 ## Resolve valid address.
 # NO-FUNC-A2L-NEXT:{"Address":"0x40054d","ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":3,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"","Line":3,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":2},{"Column":0,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"","Line":7,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":6},{"Column":0,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"","Line":14,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":12}]}
 ## Invalid argument after a valid one.
-# NO-FUNC-A2L-NEXT:{"ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]}
+# NO-FUNC-A2L-NEXT:{"Loc":[],"ModuleName":"{{.*}}/Inputs/addr.exe","SymName":"some"}
+
+## When a module offset is specified by a symbol, more than one source location can be found.
+# RUN: llvm-symbolizer --output-style=JSON --no-inlines -e %p/Inputs/symbols.so "static_func" | \
+# RUN:   FileCheck %s --check-prefix=MULTIPLE --strict-whitespace --match-full-lines --implicit-check-not={{.}}
+# MULTIPLE:[{"Loc":[{"Column":24,"Discriminator":0,"FileName":"/tmp/dbginfo{{/|\\\\}}symbols.part3.c","FunctionName":"static_func","Line":4,"StartAddress":"0x121d","StartFileName":"/tmp/dbginfo{{/|\\\\}}symbols.part3.c","StartLine":4},{"Column":24,"Discriminator":0,"FileName":"/tmp/dbginfo{{/|\\\\}}symbols.part4.c","FunctionName":"static_func","Line":4,"StartAddress":"0x125f","StartFileName":"/tmp/dbginfo{{/|\\\\}}symbols.part4.c","StartLine":4}],"ModuleName":"{{.*}}Inputs/symbols.so","SymName":"static_func"}]

diff  --git a/llvm/test/tools/llvm-symbolizer/output-style-json-data.test b/llvm/test/tools/llvm-symbolizer/output-style-json-data.test
index 722ac73d751043b..b91555937086ef1 100644
--- a/llvm/test/tools/llvm-symbolizer/output-style-json-data.test
+++ b/llvm/test/tools/llvm-symbolizer/output-style-json-data.test
@@ -9,8 +9,8 @@
 
 ## Handle invalid argument.
 # RUN: llvm-symbolizer "DATA tmp.o Z" --output-style=JSON | \
-# RUN:   FileCheck %s --check-prefix=INVARG --strict-whitespace --match-full-lines --implicit-check-not={{.}}
-# INVARG:[{"ModuleName":"tmp.o","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]}]
+# RUN:   FileCheck %s -DMSG=%errc_ENOENT --check-prefix=INVARG --strict-whitespace --match-full-lines --implicit-check-not={{.}}
+# INVARG:[{"Error":{"Message":"[[MSG]]"},"ModuleName":"tmp.o","SymName":"Z"}]
 
 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
 

diff  --git a/llvm/test/tools/llvm-symbolizer/output-style-json-frame.ll b/llvm/test/tools/llvm-symbolizer/output-style-json-frame.ll
index 8a99345420fdeba..f82d6704ef8f915 100644
--- a/llvm/test/tools/llvm-symbolizer/output-style-json-frame.ll
+++ b/llvm/test/tools/llvm-symbolizer/output-style-json-frame.ll
@@ -9,8 +9,8 @@
 
 ;; Handle invalid argument.
 ; RUN: llvm-symbolizer "FRAME tmp.o Z" --output-style=JSON | \
-; RUN:   FileCheck %s --check-prefix=INVARG --strict-whitespace --match-full-lines --implicit-check-not={{.}}
-; INVARG:[{"ModuleName":"tmp.o","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]}]
+; RUN:   FileCheck %s -DMSG=%errc_ENOENT --check-prefix=INVARG --strict-whitespace --match-full-lines --implicit-check-not={{.}}
+; INVARG:[{"Error":{"Message":"[[MSG]]"},"ModuleName":"tmp.o","SymName":"Z"}]
 
 ; RUN: llc -filetype=obj -o %t.o %s 
 

diff  --git a/llvm/test/tools/llvm-symbolizer/symbol-search.test b/llvm/test/tools/llvm-symbolizer/symbol-search.test
new file mode 100644
index 000000000000000..634229c2e74c00c
--- /dev/null
+++ b/llvm/test/tools/llvm-symbolizer/symbol-search.test
@@ -0,0 +1,65 @@
+# This test checks the case when an address is specified by a symbol name rather
+# than a number.
+#
+# It uses ELF shared object `Inputs/symbols.so` built for x86_64 using
+# the instructions from `Inputs/symbols.h`.
+
+# Show that the "CODE" command supports search by symbol name.
+RUN: llvm-addr2line --obj=%p/Inputs/symbols.so "CODE func_01" | FileCheck --check-prefix=CODE-CMD %s
+RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so "CODE func_01" | FileCheck --check-prefix=CODE-CMD %s
+CODE-CMD: /tmp/dbginfo{{[/\]+}}symbols.part1.cpp:12
+
+# Check if a symbol name can be specified on the command-line.
+RUN: llvm-addr2line -e %p/Inputs/symbols.so func_01 | FileCheck --check-prefix=SYMB %s
+RUN: llvm-symbolizer -e %p/Inputs/symbols.so func_01 | FileCheck --check-prefix=SYMB %s
+SYMB: /tmp/dbginfo{{[/\]+}}symbols.part1.cpp:12
+
+# Check that if a symbol has a space in its name, ignore everything from the space onwards.
+RUN: llvm-addr2line -e %p/Inputs/symbols.so "func_01 ignored text" | FileCheck --check-prefix=SYMB %s
+RUN: llvm-symbolizer -e %p/Inputs/symbols.so "func_01 ignored text" | FileCheck --check-prefix=SYMB %s
+
+# Show that a symbol name may be resolved to more than one location.
+RUN: llvm-addr2line -e %p/Inputs/symbols.so static_func | FileCheck --check-prefix=SYMB-MULTI %s
+SYMB-MULTI:      /tmp/dbginfo{{[/\]+}}symbols.part3.c:4
+SYMB-MULTI-NEXT: /tmp/dbginfo{{[/\]+}}symbols.part4.c:4
+
+# Show that if a symbol is not found, a special mark is printed.
+RUN: llvm-addr2line --obj=%p/Inputs/symbols.so func_666 | FileCheck --check-prefix=NONEXISTENT %s
+RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so func_666 | FileCheck --check-prefix=NONEXISTENT %s
+NONEXISTENT: ??
+
+# Show that more than one symbol may be specified.
+RUN: llvm-addr2line --obj=%p/Inputs/symbols.so func_01 func_02 | FileCheck --check-prefix=FUNCS %s
+RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so func_01 func_02 | FileCheck --check-prefix=FUNCS %s
+FUNCS:  /tmp/dbginfo{{[/\]+}}symbols.part1.cpp:12
+FUNCS:  /tmp/dbginfo{{[/\]+}}symbols.part2.cpp:10
+
+# Show that C++ mangled names may be specified.
+RUN: llvm-addr2line --obj=%p/Inputs/symbols.so _ZL14static_func_01i | FileCheck --check-prefix=MULTI-CXX %s
+RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so _ZL14static_func_01i | FileCheck --check-prefix=MULTI-CXX %s
+MULTI-CXX: /tmp/dbginfo{{[/\]+}}symbols.part1.cpp:7
+MULTI-CXX: /tmp/dbginfo{{[/\]+}}symbols.part2.cpp:5
+
+# Show that containing function name can be printed in mangled form.
+RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so --no-demangle _Z7func_04i | FileCheck --check-prefix=MANGLED %s
+RUN: llvm-addr2line --obj=%p/Inputs/symbols.so -f _Z7func_04i | FileCheck --check-prefix=MANGLED %s
+MANGLED: _Z7func_04i
+MANGLED-NEXT: /tmp/dbginfo{{[/\]+}}symbols.part1.cpp:22
+
+# Show that containing function name can be printed in demangled form.
+RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so _Z7func_04i | FileCheck --check-prefix=NOTMANGLED %s
+RUN: llvm-addr2line --obj=%p/Inputs/symbols.so -f --demangle _Z7func_04i | FileCheck --check-prefix=NOTMANGLED %s
+NOTMANGLED: func_04(int)
+NOTMANGLED-NEXT: /tmp/dbginfo{{[/\]+}}symbols.part1.cpp:22
+
+# Show that both the symbol and input file can be specified in the search string on the command-line.
+RUN: llvm-addr2line "%p/Inputs/symbols.so func_01" | FileCheck --check-prefix=SYMBIN %s
+RUN: llvm-symbolizer "%p/Inputs/symbols.so func_01" | FileCheck --check-prefix=SYMBIN %s
+SYMBIN: /tmp/dbginfo{{[/\]+}}symbols.part1.cpp:12
+
+# Show that the case of missing input file specified in the search string on the command-line is properly treated.
+RUN: llvm-addr2line "%p/Inputs/666.so func_01" 2> %t.1.stderr | FileCheck --check-prefix=NONEXISTENT %s
+RUN: FileCheck --input-file=%t.1.stderr --check-prefix=BINARY-NOT-FOUND -DMSG=%errc_ENOENT %s
+RUN: llvm-symbolizer "%p/Inputs/666.so func_01" 2> %t.2.stderr | FileCheck --check-prefix=NONEXISTENT %s
+RUN: FileCheck --input-file=%t.2.stderr --check-prefix=BINARY-NOT-FOUND -DMSG=%errc_ENOENT %s
+BINARY-NOT-FOUND: error: '{{.*}}666.so': [[MSG]]

diff  --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
index 78a0e6772f3fb36..447c18abadc1743 100644
--- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -159,7 +159,7 @@ static Error makeStringError(StringRef Msg) {
 static Error parseCommand(StringRef BinaryName, bool IsAddr2Line,
                           StringRef InputString, Command &Cmd,
                           std::string &ModuleName, object::BuildID &BuildID,
-                          uint64_t &ModuleOffset) {
+                          StringRef &Symbol, uint64_t &ModuleOffset) {
   ModuleName = BinaryName;
   if (InputString.consume_front("CODE ")) {
     Cmd = Command::Code;
@@ -224,35 +224,41 @@ static Error parseCommand(StringRef BinaryName, bool IsAddr2Line,
       return makeStringError("no input filename has been specified");
   }
 
-  // Parse module offset.
+  // Parse module offset, which can be specified as a number or as a symbol.
   InputString = InputString.ltrim();
   if (InputString.empty())
     return makeStringError("no module offset has been specified");
+
+  // If input string contains a space, ignore everything after it. This behavior
+  // is consistent with GNU addr2line.
   int OffsetLength = InputString.find_first_of(" \n\r");
   StringRef Offset = InputString.substr(0, OffsetLength);
+
   // GNU addr2line assumes the offset is hexadecimal and allows a redundant
   // "0x" or "0X" prefix; do the same for compatibility.
   if (IsAddr2Line)
     Offset.consume_front("0x") || Offset.consume_front("0X");
 
-  // If the input is not a valid module offset, it is not an error, but its
-  // lookup does not make sense. Return error of 
diff erent kind to distinguish
-  // from error or success.
-  if (Offset.getAsInteger(IsAddr2Line ? 16 : 0, ModuleOffset))
-    return errorCodeToError(errc::invalid_argument);
+  // If the input is not a number, treat it is a symbol.
+  if (Offset.getAsInteger(IsAddr2Line ? 16 : 0, ModuleOffset)) {
+    Symbol = Offset;
+    ModuleOffset = 0;
+  }
 
   return Error::success();
 }
 
 template <typename T>
 void executeCommand(StringRef ModuleName, const T &ModuleSpec, Command Cmd,
-                    uint64_t Offset, uint64_t AdjustVMA, bool ShouldInline,
-                    OutputStyle Style, LLVMSymbolizer &Symbolizer,
-                    DIPrinter &Printer) {
+                    StringRef Symbol, uint64_t Offset, uint64_t AdjustVMA,
+                    bool ShouldInline, OutputStyle Style,
+                    LLVMSymbolizer &Symbolizer, DIPrinter &Printer) {
   uint64_t AdjustedOffset = Offset - AdjustVMA;
   object::SectionedAddress Address = {AdjustedOffset,
                                       object::SectionedAddress::UndefSection};
-  Request SymRequest = {ModuleName, Offset};
+  Request SymRequest = {
+      ModuleName, Symbol.empty() ? std::make_optional(Offset) : std::nullopt,
+      Symbol};
   if (Cmd == Command::Data) {
     Expected<DIGlobal> ResOrErr = Symbolizer.symbolizeData(ModuleSpec, Address);
     print(SymRequest, ResOrErr, Printer);
@@ -260,6 +266,10 @@ void executeCommand(StringRef ModuleName, const T &ModuleSpec, Command Cmd,
     Expected<std::vector<DILocal>> ResOrErr =
         Symbolizer.symbolizeFrame(ModuleSpec, Address);
     print(SymRequest, ResOrErr, Printer);
+  } else if (!Symbol.empty()) {
+    Expected<std::vector<DILineInfo>> ResOrErr =
+        Symbolizer.findSymbol(ModuleSpec, Symbol);
+    print(SymRequest, ResOrErr, Printer);
   } else if (ShouldInline) {
     Expected<DIInliningInfo> ResOrErr =
         Symbolizer.symbolizeInlinedCode(ModuleSpec, Address);
@@ -288,7 +298,7 @@ void executeCommand(StringRef ModuleName, const T &ModuleSpec, Command Cmd,
 }
 
 static void printUnknownLineInfo(std::string ModuleName, DIPrinter &Printer) {
-  Request SymRequest = {ModuleName, std::nullopt};
+  Request SymRequest = {ModuleName, std::nullopt, StringRef()};
   Printer.print(SymRequest, DILineInfo());
 }
 
@@ -301,16 +311,14 @@ static void symbolizeInput(const opt::InputArgList &Args,
   std::string ModuleName;
   object::BuildID BuildID(IncomingBuildID.begin(), IncomingBuildID.end());
   uint64_t Offset = 0;
+  StringRef Symbol;
   if (Error E = parseCommand(Args.getLastArgValue(OPT_obj_EQ), IsAddr2Line,
                              StringRef(InputString), Cmd, ModuleName, BuildID,
-                             Offset)) {
-    handleAllErrors(
-        std::move(E),
-        [&](const StringError &EI) {
-          printError(EI, InputString);
-          printUnknownLineInfo(ModuleName, Printer);
-        },
-        [&](const ECError &EI) { printUnknownLineInfo(ModuleName, Printer); });
+                             Symbol, Offset)) {
+    handleAllErrors(std::move(E), [&](const StringError &EI) {
+      printError(EI, InputString);
+      printUnknownLineInfo(ModuleName, Printer);
+    });
     return;
   }
   bool ShouldInline = Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line);
@@ -319,11 +327,11 @@ static void symbolizeInput(const opt::InputArgList &Args,
     if (!Args.hasArg(OPT_no_debuginfod))
       enableDebuginfod(Symbolizer, Args);
     std::string BuildIDStr = toHex(BuildID);
-    executeCommand(BuildIDStr, BuildID, Cmd, Offset, AdjustVMA, ShouldInline,
-                   Style, Symbolizer, Printer);
+    executeCommand(BuildIDStr, BuildID, Cmd, Symbol, Offset, AdjustVMA,
+                   ShouldInline, Style, Symbolizer, Printer);
   } else {
-    executeCommand(ModuleName, ModuleName, Cmd, Offset, AdjustVMA, ShouldInline,
-                   Style, Symbolizer, Printer);
+    executeCommand(ModuleName, ModuleName, Cmd, Symbol, Offset, AdjustVMA,
+                   ShouldInline, Style, Symbolizer, Printer);
   }
 }
 
@@ -527,7 +535,7 @@ int llvm_symbolizer_main(int argc, char **argv, const llvm::ToolContext &) {
   if (auto *Arg = Args.getLastArg(OPT_obj_EQ); Arg) {
     auto Status = Symbolizer.getOrCreateModuleInfo(Arg->getValue());
     if (!Status) {
-      Request SymRequest = {Arg->getValue(), 0};
+      Request SymRequest = {Arg->getValue(), 0, StringRef()};
       handleAllErrors(Status.takeError(), [&](const ErrorInfoBase &EI) {
         Printer->printError(SymRequest, EI);
       });

diff  --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp
index 5984be98d798a86..682f79a540cdc5c 100644
--- a/llvm/unittests/ProfileData/MemProfTest.cpp
+++ b/llvm/unittests/ProfileData/MemProfTest.cpp
@@ -20,6 +20,7 @@ using ::llvm::DIInliningInfo;
 using ::llvm::DILineInfo;
 using ::llvm::DILineInfoSpecifier;
 using ::llvm::DILocal;
+using ::llvm::StringRef;
 using ::llvm::memprof::CallStackMap;
 using ::llvm::memprof::Frame;
 using ::llvm::memprof::FrameId;
@@ -53,6 +54,9 @@ class MockSymbolizer : public SymbolizableModule {
   virtual std::vector<DILocal> symbolizeFrame(SectionedAddress) const {
     llvm_unreachable("unused");
   }
+  virtual std::vector<SectionedAddress> findSymbol(StringRef Symbol) const {
+    llvm_unreachable("unused");
+  }
   virtual bool isWin32Module() const { llvm_unreachable("unused"); }
   virtual uint64_t getModulePreferredBase() const {
     llvm_unreachable("unused");


        


More information about the llvm-commits mailing list