[llvm] e144ae5 - [symbolizer] Support symbol lookup
Serge Pavlov via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 1 00:42:27 PDT 2023
Author: Serge Pavlov
Date: 2023-11-01T14:41:39+07:00
New Revision: e144ae54dcb96838a6176fd9eef21028935ccd4f
URL: https://github.com/llvm/llvm-project/commit/e144ae54dcb96838a6176fd9eef21028935ccd4f
DIFF: https://github.com/llvm/llvm-project/commit/e144ae54dcb96838a6176fd9eef21028935ccd4f.diff
LOG: [symbolizer] Support symbol lookup
Recent versions of GNU binutils starting from 2.39 support symbol+offset
lookup in addition to the usual numeric address lookup. This change adds
symbol lookup to llvm-symbolize and llvm-addr2line.
Now llvm-symbolize behaves closer to GNU addr2line, - if the value specified
as address in command line or input stream is not a number, it is treated as
a symbol name. For example:
llvm-symbolize --obj=abc.so func_22
llvm-symbolize --obj=abc.so "CODE func_22"
This lookup is now supported only for functions. Specification with
offset is not supported yet.
This is a recommit of 2b27948783e4bbc1132d3220d8517ef62607b558, reverted
in 39fec5457c0925bd39f67f63fe17391584e08258 because the test
llvm/test/Support/interrupts.test started failing on Windows. The test was
changed in 18f036d0105589c3175bb51a518c5d272dae61e2 and is also updated in
this commit.
Differential Revision: https://reviews.llvm.org/D149759
Added:
llvm/test/tools/llvm-symbolizer/Inputs/symbols.h
llvm/test/tools/llvm-symbolizer/Inputs/symbols.part1.cpp
llvm/test/tools/llvm-symbolizer/Inputs/symbols.part2.cpp
llvm/test/tools/llvm-symbolizer/Inputs/symbols.part3.c
llvm/test/tools/llvm-symbolizer/Inputs/symbols.part4.c
llvm/test/tools/llvm-symbolizer/Inputs/symbols.so
llvm/test/tools/llvm-symbolizer/symbol-search.test
Modified:
llvm/docs/CommandGuide/llvm-symbolizer.rst
llvm/docs/ReleaseNotes.rst
llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h
llvm/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h
llvm/include/llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h
llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp
llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
llvm/test/Support/interrupts.test
llvm/test/tools/llvm-symbolizer/Inputs/addr.inp
llvm/test/tools/llvm-symbolizer/Inputs/discrim.inp
llvm/test/tools/llvm-symbolizer/output-style-json-code.test
llvm/test/tools/llvm-symbolizer/output-style-json-data.test
llvm/test/tools/llvm-symbolizer/output-style-json-frame.ll
llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
llvm/unittests/ProfileData/MemProfTest.cpp
Removed:
################################################################################
diff --git a/llvm/docs/CommandGuide/llvm-symbolizer.rst b/llvm/docs/CommandGuide/llvm-symbolizer.rst
index fe5df077b45664d..59c0ab6d196ace1 100644
--- a/llvm/docs/CommandGuide/llvm-symbolizer.rst
+++ b/llvm/docs/CommandGuide/llvm-symbolizer.rst
@@ -14,7 +14,7 @@ DESCRIPTION
:program:`llvm-symbolizer` reads input names and addresses from the command-line
and prints corresponding source code locations to standard output. It can also
symbolize logs containing :doc:`Symbolizer Markup </SymbolizerMarkupFormat>` via
-:option:`--filter-markup`.
+:option:`--filter-markup`. Addresses may be specified as numbers or symbol names.
If no address is specified on the command-line, it reads the addresses from
standard input. If no input name is specified on the command-line, but addresses
@@ -196,6 +196,17 @@ shows --relativenames.
main
foo/test.cpp:15:0
+Example 7 - Addresses as symbol names:
+
+.. code-block:: console
+
+ $ llvm-symbolizer --obj=test.elf main
+ main
+ /tmp/test.cpp:14:0
+ $ llvm-symbolizer --obj=test.elf "CODE foz"
+ foz
+ /tmp/test.h:1:0
+
OPTIONS
-------
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index a1277171e8a54ec..25817e6e1d7f413 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -201,6 +201,8 @@ Changes to the LLVM tools
* ``llvm-nm`` now supports the ``--line-numbers`` (``-l``) option to use
debugging information to print symbols' filenames and line numbers.
+* llvm-symbolizer and llvm-addr2line now support addresses specified as symbol names.
+
Changes to LLDB
---------------------------------
diff --git a/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h b/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h
index 026f917ced5bc1f..72ffdd29f1b72d5 100644
--- a/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h
+++ b/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h
@@ -34,6 +34,7 @@ class SourceCode;
struct Request {
StringRef ModuleName;
std::optional<uint64_t> Address;
+ StringRef Symbol;
};
class DIPrinter {
@@ -46,6 +47,8 @@ class DIPrinter {
virtual void print(const Request &Request, const DIGlobal &Global) = 0;
virtual void print(const Request &Request,
const std::vector<DILocal> &Locals) = 0;
+ virtual void print(const Request &Request,
+ const std::vector<DILineInfo> &Locations) = 0;
virtual bool printError(const Request &Request,
const ErrorInfoBase &ErrorInfo) = 0;
@@ -91,6 +94,8 @@ class PlainPrinterBase : public DIPrinter {
void print(const Request &Request, const DIGlobal &Global) override;
void print(const Request &Request,
const std::vector<DILocal> &Locals) override;
+ void print(const Request &Request,
+ const std::vector<DILineInfo> &Locations) override;
bool printError(const Request &Request,
const ErrorInfoBase &ErrorInfo) override;
@@ -141,6 +146,8 @@ class JSONPrinter : public DIPrinter {
void print(const Request &Request, const DIGlobal &Global) override;
void print(const Request &Request,
const std::vector<DILocal> &Locals) override;
+ void print(const Request &Request,
+ const std::vector<DILineInfo> &Locations) override;
bool printError(const Request &Request,
const ErrorInfoBase &ErrorInfo) override;
diff --git a/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h b/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h
index 51e92b83eadbac0..255932d35cda114 100644
--- a/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h
+++ b/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h
@@ -36,6 +36,9 @@ class SymbolizableModule {
virtual std::vector<DILocal>
symbolizeFrame(object::SectionedAddress ModuleOffset) const = 0;
+ virtual std::vector<object::SectionedAddress>
+ findSymbol(StringRef Symbol) const = 0;
+
// Return true if this is a 32-bit x86 PE COFF module.
virtual bool isWin32Module() const = 0;
diff --git a/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h b/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h
index 075dbe3e0e372ed..311fa201d900e4a 100644
--- a/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h
+++ b/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h
@@ -43,6 +43,8 @@ class SymbolizableObjectFile : public SymbolizableModule {
DIGlobal symbolizeData(object::SectionedAddress ModuleOffset) const override;
std::vector<DILocal>
symbolizeFrame(object::SectionedAddress ModuleOffset) const override;
+ std::vector<object::SectionedAddress>
+ findSymbol(StringRef Symbol) const override;
// Return true if this is a 32-bit x86 PE COFF module.
bool isWin32Module() const override;
diff --git a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
index 99a7f219baaa09f..bc4aa74073a6557 100644
--- a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
+++ b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
@@ -104,6 +104,14 @@ class LLVMSymbolizer {
Expected<std::vector<DILocal>>
symbolizeFrame(ArrayRef<uint8_t> BuildID,
object::SectionedAddress ModuleOffset);
+
+ Expected<std::vector<DILineInfo>> findSymbol(const ObjectFile &Obj,
+ StringRef Symbol);
+ Expected<std::vector<DILineInfo>> findSymbol(StringRef ModuleName,
+ StringRef Symbol);
+ Expected<std::vector<DILineInfo>> findSymbol(ArrayRef<uint8_t> BuildID,
+ StringRef Symbol);
+
void flush();
// Evict entries from the binary cache until it is under the maximum size
@@ -146,6 +154,9 @@ class LLVMSymbolizer {
Expected<std::vector<DILocal>>
symbolizeFrameCommon(const T &ModuleSpecifier,
object::SectionedAddress ModuleOffset);
+ template <typename T>
+ Expected<std::vector<DILineInfo>> findSymbolCommon(const T &ModuleSpecifier,
+ StringRef Symbol);
Expected<SymbolizableModule *> getOrCreateModuleInfo(const ObjectFile &Obj);
diff --git a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp
index dcf5eee2bb32b61..d7b33ce1d0f062d 100644
--- a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp
@@ -260,6 +260,17 @@ void PlainPrinterBase::print(const Request &Request,
printFooter();
}
+void PlainPrinterBase::print(const Request &Request,
+ const std::vector<DILineInfo> &Locations) {
+ if (Locations.empty()) {
+ print(Request, DILineInfo());
+ } else {
+ for (const DILineInfo &L : Locations)
+ print(L, false);
+ printFooter();
+ }
+}
+
bool PlainPrinterBase::printError(const Request &Request,
const ErrorInfoBase &ErrorInfo) {
ErrHandler(ErrorInfo, Request.ModuleName);
@@ -273,6 +284,8 @@ static std::string toHex(uint64_t V) {
static json::Object toJSON(const Request &Request, StringRef ErrorMsg = "") {
json::Object Json({{"ModuleName", Request.ModuleName.str()}});
+ if (!Request.Symbol.empty())
+ Json["SymName"] = Request.Symbol.str();
if (Request.Address)
Json["Address"] = toHex(*Request.Address);
if (!ErrorMsg.empty())
@@ -362,6 +375,19 @@ void JSONPrinter::print(const Request &Request,
printJSON(std::move(Json));
}
+void JSONPrinter::print(const Request &Request,
+ const std::vector<DILineInfo> &Locations) {
+ json::Array Definitions;
+ for (const DILineInfo &L : Locations)
+ Definitions.push_back(toJSON(L));
+ json::Object Json = toJSON(Request);
+ Json["Loc"] = std::move(Definitions);
+ if (ObjectList)
+ ObjectList->push_back(std::move(Json));
+ else
+ printJSON(std::move(Json));
+}
+
bool JSONPrinter::printError(const Request &Request,
const ErrorInfoBase &ErrorInfo) {
json::Object Json = toJSON(Request, ErrorInfo.message());
diff --git a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
index 6b8068a531c05fa..697303038507a96 100644
--- a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
@@ -351,6 +351,19 @@ std::vector<DILocal> SymbolizableObjectFile::symbolizeFrame(
return DebugInfoContext->getLocalsForAddress(ModuleOffset);
}
+std::vector<object::SectionedAddress>
+SymbolizableObjectFile::findSymbol(StringRef Symbol) const {
+ std::vector<object::SectionedAddress> Result;
+ for (const SymbolDesc &Sym : Symbols) {
+ if (Sym.Name.equals(Symbol)) {
+ object::SectionedAddress A{Sym.Addr,
+ getModuleSectionIndexForAddress(Sym.Addr)};
+ Result.push_back(A);
+ }
+ }
+ return Result;
+}
+
/// Search for the first occurence of specified Address in ObjectFile.
uint64_t SymbolizableObjectFile::getModuleSectionIndexForAddress(
uint64_t Address) const {
diff --git a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
index 517f1e7dc284f9c..36d112a5f3fb299 100644
--- a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
@@ -231,6 +231,50 @@ LLVMSymbolizer::symbolizeFrame(ArrayRef<uint8_t> BuildID,
return symbolizeFrameCommon(BuildID, ModuleOffset);
}
+template <typename T>
+Expected<std::vector<DILineInfo>>
+LLVMSymbolizer::findSymbolCommon(const T &ModuleSpecifier, StringRef Symbol) {
+ auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
+ if (!InfoOrErr)
+ return InfoOrErr.takeError();
+
+ SymbolizableModule *Info = *InfoOrErr;
+ std::vector<DILineInfo> Result;
+
+ // A null module means an error has already been reported. Return an empty
+ // result.
+ if (!Info)
+ return Result;
+
+ for (object::SectionedAddress A : Info->findSymbol(Symbol)) {
+ DILineInfo LineInfo = Info->symbolizeCode(
+ A, DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions),
+ Opts.UseSymbolTable);
+ if (LineInfo.FileName != DILineInfo::BadString) {
+ if (Opts.Demangle)
+ LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info);
+ Result.push_back(LineInfo);
+ }
+ }
+
+ return Result;
+}
+
+Expected<std::vector<DILineInfo>>
+LLVMSymbolizer::findSymbol(const ObjectFile &Obj, StringRef Symbol) {
+ return findSymbolCommon(Obj, Symbol);
+}
+
+Expected<std::vector<DILineInfo>>
+LLVMSymbolizer::findSymbol(StringRef ModuleName, StringRef Symbol) {
+ return findSymbolCommon(ModuleName.str(), Symbol);
+}
+
+Expected<std::vector<DILineInfo>>
+LLVMSymbolizer::findSymbol(ArrayRef<uint8_t> BuildID, StringRef Symbol) {
+ return findSymbolCommon(BuildID, Symbol);
+}
+
void LLVMSymbolizer::flush() {
ObjectForUBPathAndArch.clear();
LRUBinaries.clear();
diff --git a/llvm/test/Support/interrupts.test b/llvm/test/Support/interrupts.test
index 752426c5292b098..4768ac61dff0260 100644
--- a/llvm/test/Support/interrupts.test
+++ b/llvm/test/Support/interrupts.test
@@ -1,7 +1,10 @@
## Show that SIGINT and similar signals don't cause crash messages to be
## reported.
# RUN: %python %s wrapper llvm-symbolizer 2> %t.err
-# RUN: count 0 < %t.err
+# RUN: FileCheck --input-file=%t.err %s
+
+# CHECK: {{.*}} error: 'foo': {{[Nn]}}o such file or directory
+# CHECK-NOT: {{.+}}
import os
import signal
diff --git a/llvm/test/tools/llvm-symbolizer/Inputs/addr.inp b/llvm/test/tools/llvm-symbolizer/Inputs/addr.inp
index b5e146b114e254f..b19992175bf9952 100644
--- a/llvm/test/tools/llvm-symbolizer/Inputs/addr.inp
+++ b/llvm/test/tools/llvm-symbolizer/Inputs/addr.inp
@@ -1,3 +1,3 @@
-some text
+something not a valid address
0x40054d
-some text2
+some text possibly a symbol
diff --git a/llvm/test/tools/llvm-symbolizer/Inputs/discrim.inp b/llvm/test/tools/llvm-symbolizer/Inputs/discrim.inp
index a5cfcb2558f3594..2c4d722e3286237 100644
--- a/llvm/test/tools/llvm-symbolizer/Inputs/discrim.inp
+++ b/llvm/test/tools/llvm-symbolizer/Inputs/discrim.inp
@@ -5,4 +5,4 @@ some text
0x4005b9
0x4005ce
0x4005d4
-some more text
+another text
diff --git a/llvm/test/tools/llvm-symbolizer/Inputs/symbols.h b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.h
new file mode 100644
index 000000000000000..b097c4d9dc00a73
--- /dev/null
+++ b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.h
@@ -0,0 +1,19 @@
+// This file is a part of sources used to build `symbols.so`, which is used to
+// test symbol location search made by llvm-symbolizer.
+//
+// Build instructions:
+// $ mkdir /tmp/dbginfo
+// $ cp symbols.h symbols.part1.cpp symbols.part2.cpp symbols.part3.c symbols.part4.c /tmp/dbginfo/
+// $ cd /tmp/dbginfo
+// $ gcc -osymbols.so -shared -fPIC -g symbols.part1.cpp symbols.part2.cpp symbols.part3.c symbols.part4.c
+
+
+extern "C" {
+extern int global_01;
+int func_01();
+int func_02(int);
+}
+
+template<typename T> T func_03(T x) {
+ return x + T(1);
+}
diff --git a/llvm/test/tools/llvm-symbolizer/Inputs/symbols.part1.cpp b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.part1.cpp
new file mode 100644
index 000000000000000..ad4b3e34411aa40
--- /dev/null
+++ b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.part1.cpp
@@ -0,0 +1,25 @@
+#include "symbols.h"
+
+int global_01 = 22;
+
+int static static_var = 0;
+
+static int static_func_01(int x) {
+ static_var = x;
+ return global_01;
+}
+
+int func_01() {
+ int res = 1;
+ return res + static_func_01(22);
+}
+
+int func_04() {
+ static_var = 0;
+ return 22;
+}
+
+int func_04(int x) {
+ int res = static_var;
+ return res + func_03(x);
+}
diff --git a/llvm/test/tools/llvm-symbolizer/Inputs/symbols.part2.cpp b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.part2.cpp
new file mode 100644
index 000000000000000..35e66d62622f89c
--- /dev/null
+++ b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.part2.cpp
@@ -0,0 +1,18 @@
+#include "symbols.h"
+
+int static static_var = 4;
+
+static int static_func_01(int x) {
+ static_var--;
+ return x;
+}
+
+int func_02(int x) {
+ static_var = x;
+ return static_func_01(x);
+}
+
+int func_05(int x) {
+ int res = static_var;
+ return res + func_03(x);
+}
diff --git a/llvm/test/tools/llvm-symbolizer/Inputs/symbols.part3.c b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.part3.c
new file mode 100644
index 000000000000000..1284be505b6bac4
--- /dev/null
+++ b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.part3.c
@@ -0,0 +1,12 @@
+static int static_func(int);
+static int static_var = 0;
+
+int static_func(int x) {
+ static_var++;
+ return static_var + x;
+}
+
+int func_06(int x) {
+ return static_func(x);
+}
+
diff --git a/llvm/test/tools/llvm-symbolizer/Inputs/symbols.part4.c b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.part4.c
new file mode 100644
index 000000000000000..de2ac81d2a78cc4
--- /dev/null
+++ b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.part4.c
@@ -0,0 +1,13 @@
+static int static_func(int);
+static int static_var = 5;
+
+int static_func(int x) {
+ static_var++;
+ return static_var + x;
+}
+
+int func_07(int x) {
+ static_var++;
+ return static_func(x);
+}
+
diff --git a/llvm/test/tools/llvm-symbolizer/Inputs/symbols.so b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.so
new file mode 100755
index 000000000000000..ceacd9845a8d880
Binary files /dev/null and b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.so
diff er
diff --git a/llvm/test/tools/llvm-symbolizer/output-style-json-code.test b/llvm/test/tools/llvm-symbolizer/output-style-json-code.test
index 9179b673f39fd00..0e0e61c0bf119a3 100644
--- a/llvm/test/tools/llvm-symbolizer/output-style-json-code.test
+++ b/llvm/test/tools/llvm-symbolizer/output-style-json-code.test
@@ -25,39 +25,44 @@
# RUN: llvm-symbolizer --output-style=JSON --no-inlines -e %p/Inputs/addr.exe < %p/Inputs/addr.inp | \
# RUN: FileCheck %s --check-prefix=NO-INLINES --strict-whitespace --match-full-lines --implicit-check-not={{.}}
## Invalid first argument before any valid one.
-# NO-INLINES:{"ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]}
+# NO-INLINES:{"Loc":[],"ModuleName":"{{.*}}/Inputs/addr.exe","SymName":"something"}
## Resolve valid address.
# NO-INLINES-NEXT:{"Address":"0x40054d","ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":3,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"main","Line":3,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":2}]}
## Invalid argument after a valid one.
-# NO-INLINES-NEXT:{"ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]}
+# NO-INLINES-NEXT:{"Loc":[],"ModuleName":"{{.*}}/Inputs/addr.exe","SymName":"some"}
## This test case is testing stdin input, inlines by default.
# RUN: llvm-symbolizer --output-style=JSON -e %p/Inputs/addr.exe < %p/Inputs/addr.inp | \
# RUN: FileCheck %s --check-prefix=INLINE --strict-whitespace --match-full-lines --implicit-check-not={{.}}
## Invalid first argument before any valid one.
-# INLINE:{"ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]}
+# INLINE:{"Loc":[],"ModuleName":"{{.*}}/Inputs/addr.exe","SymName":"something"}
## Resolve valid address.
# INLINE-NEXT:{"Address":"0x40054d","ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":3,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"inctwo","Line":3,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":2},{"Column":0,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"inc","Line":7,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":6},{"Column":0,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"main","Line":14,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":12}]}
## Invalid argument after a valid one.
-# INLINE-NEXT:{"ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]}
+# INLINE-NEXT:{"Loc":[],"ModuleName":"{{.*}}/Inputs/addr.exe","SymName":"some"}
## Also check the last test case with llvm-adr2line.
## The expected result is the same with -f -i.
# RUN: llvm-addr2line --output-style=JSON -f -i -e %p/Inputs/addr.exe < %p/Inputs/addr.inp | \
# RUN: FileCheck %s --check-prefix=INLINE-A2L --strict-whitespace --match-full-lines --implicit-check-not={{.}}
## Invalid first argument before any valid one.
-# INLINE-A2L:{"ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]}
+# INLINE-A2L:{"Loc":[],"ModuleName":"{{.*}}/Inputs/addr.exe","SymName":"something"}
## Resolve valid address.
# INLINE-A2L-NEXT:{"Address":"0x40054d","ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":3,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"inctwo","Line":3,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":2},{"Column":0,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"inc","Line":7,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":6},{"Column":0,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"main","Line":14,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":12}]}
## Invalid argument after a valid one.
-# INLINE-A2L-NEXT:{"ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]}
+# INLINE-A2L:{"Loc":[],"ModuleName":"{{.*}}/Inputs/addr.exe","SymName":"some"}
## Note llvm-addr2line without -f does not print the function name in JSON too.
# RUN: llvm-addr2line --output-style=JSON -i -e %p/Inputs/addr.exe < %p/Inputs/addr.inp | \
# RUN: FileCheck %s --check-prefix=NO-FUNC-A2L --strict-whitespace --match-full-lines --implicit-check-not={{.}}
## Invalid first argument before any valid one.
-# NO-FUNC-A2L:{"ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]}
+# NO-FUNC-A2L:{"Loc":[],"ModuleName":"{{.*}}/Inputs/addr.exe","SymName":"something"}
## Resolve valid address.
# NO-FUNC-A2L-NEXT:{"Address":"0x40054d","ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":3,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"","Line":3,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":2},{"Column":0,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"","Line":7,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":6},{"Column":0,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"","Line":14,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":12}]}
## Invalid argument after a valid one.
-# NO-FUNC-A2L-NEXT:{"ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]}
+# NO-FUNC-A2L-NEXT:{"Loc":[],"ModuleName":"{{.*}}/Inputs/addr.exe","SymName":"some"}
+
+## When a module offset is specified by a symbol, more than one source location can be found.
+# RUN: llvm-symbolizer --output-style=JSON --no-inlines -e %p/Inputs/symbols.so "static_func" | \
+# RUN: FileCheck %s --check-prefix=MULTIPLE --strict-whitespace --match-full-lines --implicit-check-not={{.}}
+# MULTIPLE:[{"Loc":[{"Column":24,"Discriminator":0,"FileName":"/tmp/dbginfo{{/|\\\\}}symbols.part3.c","FunctionName":"static_func","Line":4,"StartAddress":"0x121d","StartFileName":"/tmp/dbginfo{{/|\\\\}}symbols.part3.c","StartLine":4},{"Column":24,"Discriminator":0,"FileName":"/tmp/dbginfo{{/|\\\\}}symbols.part4.c","FunctionName":"static_func","Line":4,"StartAddress":"0x125f","StartFileName":"/tmp/dbginfo{{/|\\\\}}symbols.part4.c","StartLine":4}],"ModuleName":"{{.*}}Inputs/symbols.so","SymName":"static_func"}]
diff --git a/llvm/test/tools/llvm-symbolizer/output-style-json-data.test b/llvm/test/tools/llvm-symbolizer/output-style-json-data.test
index 722ac73d751043b..b91555937086ef1 100644
--- a/llvm/test/tools/llvm-symbolizer/output-style-json-data.test
+++ b/llvm/test/tools/llvm-symbolizer/output-style-json-data.test
@@ -9,8 +9,8 @@
## Handle invalid argument.
# RUN: llvm-symbolizer "DATA tmp.o Z" --output-style=JSON | \
-# RUN: FileCheck %s --check-prefix=INVARG --strict-whitespace --match-full-lines --implicit-check-not={{.}}
-# INVARG:[{"ModuleName":"tmp.o","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]}]
+# RUN: FileCheck %s -DMSG=%errc_ENOENT --check-prefix=INVARG --strict-whitespace --match-full-lines --implicit-check-not={{.}}
+# INVARG:[{"Error":{"Message":"[[MSG]]"},"ModuleName":"tmp.o","SymName":"Z"}]
# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
diff --git a/llvm/test/tools/llvm-symbolizer/output-style-json-frame.ll b/llvm/test/tools/llvm-symbolizer/output-style-json-frame.ll
index 8a99345420fdeba..f82d6704ef8f915 100644
--- a/llvm/test/tools/llvm-symbolizer/output-style-json-frame.ll
+++ b/llvm/test/tools/llvm-symbolizer/output-style-json-frame.ll
@@ -9,8 +9,8 @@
;; Handle invalid argument.
; RUN: llvm-symbolizer "FRAME tmp.o Z" --output-style=JSON | \
-; RUN: FileCheck %s --check-prefix=INVARG --strict-whitespace --match-full-lines --implicit-check-not={{.}}
-; INVARG:[{"ModuleName":"tmp.o","Symbol":[{"Column":0,"Discriminator":0,"FileName":"","FunctionName":"","Line":0,"StartAddress":"","StartFileName":"","StartLine":0}]}]
+; RUN: FileCheck %s -DMSG=%errc_ENOENT --check-prefix=INVARG --strict-whitespace --match-full-lines --implicit-check-not={{.}}
+; INVARG:[{"Error":{"Message":"[[MSG]]"},"ModuleName":"tmp.o","SymName":"Z"}]
; RUN: llc -filetype=obj -o %t.o %s
diff --git a/llvm/test/tools/llvm-symbolizer/symbol-search.test b/llvm/test/tools/llvm-symbolizer/symbol-search.test
new file mode 100644
index 000000000000000..634229c2e74c00c
--- /dev/null
+++ b/llvm/test/tools/llvm-symbolizer/symbol-search.test
@@ -0,0 +1,65 @@
+# This test checks the case when an address is specified by a symbol name rather
+# than a number.
+#
+# It uses ELF shared object `Inputs/symbols.so` built for x86_64 using
+# the instructions from `Inputs/symbols.h`.
+
+# Show that the "CODE" command supports search by symbol name.
+RUN: llvm-addr2line --obj=%p/Inputs/symbols.so "CODE func_01" | FileCheck --check-prefix=CODE-CMD %s
+RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so "CODE func_01" | FileCheck --check-prefix=CODE-CMD %s
+CODE-CMD: /tmp/dbginfo{{[/\]+}}symbols.part1.cpp:12
+
+# Check if a symbol name can be specified on the command-line.
+RUN: llvm-addr2line -e %p/Inputs/symbols.so func_01 | FileCheck --check-prefix=SYMB %s
+RUN: llvm-symbolizer -e %p/Inputs/symbols.so func_01 | FileCheck --check-prefix=SYMB %s
+SYMB: /tmp/dbginfo{{[/\]+}}symbols.part1.cpp:12
+
+# Check that if a symbol has a space in its name, ignore everything from the space onwards.
+RUN: llvm-addr2line -e %p/Inputs/symbols.so "func_01 ignored text" | FileCheck --check-prefix=SYMB %s
+RUN: llvm-symbolizer -e %p/Inputs/symbols.so "func_01 ignored text" | FileCheck --check-prefix=SYMB %s
+
+# Show that a symbol name may be resolved to more than one location.
+RUN: llvm-addr2line -e %p/Inputs/symbols.so static_func | FileCheck --check-prefix=SYMB-MULTI %s
+SYMB-MULTI: /tmp/dbginfo{{[/\]+}}symbols.part3.c:4
+SYMB-MULTI-NEXT: /tmp/dbginfo{{[/\]+}}symbols.part4.c:4
+
+# Show that if a symbol is not found, a special mark is printed.
+RUN: llvm-addr2line --obj=%p/Inputs/symbols.so func_666 | FileCheck --check-prefix=NONEXISTENT %s
+RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so func_666 | FileCheck --check-prefix=NONEXISTENT %s
+NONEXISTENT: ??
+
+# Show that more than one symbol may be specified.
+RUN: llvm-addr2line --obj=%p/Inputs/symbols.so func_01 func_02 | FileCheck --check-prefix=FUNCS %s
+RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so func_01 func_02 | FileCheck --check-prefix=FUNCS %s
+FUNCS: /tmp/dbginfo{{[/\]+}}symbols.part1.cpp:12
+FUNCS: /tmp/dbginfo{{[/\]+}}symbols.part2.cpp:10
+
+# Show that C++ mangled names may be specified.
+RUN: llvm-addr2line --obj=%p/Inputs/symbols.so _ZL14static_func_01i | FileCheck --check-prefix=MULTI-CXX %s
+RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so _ZL14static_func_01i | FileCheck --check-prefix=MULTI-CXX %s
+MULTI-CXX: /tmp/dbginfo{{[/\]+}}symbols.part1.cpp:7
+MULTI-CXX: /tmp/dbginfo{{[/\]+}}symbols.part2.cpp:5
+
+# Show that containing function name can be printed in mangled form.
+RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so --no-demangle _Z7func_04i | FileCheck --check-prefix=MANGLED %s
+RUN: llvm-addr2line --obj=%p/Inputs/symbols.so -f _Z7func_04i | FileCheck --check-prefix=MANGLED %s
+MANGLED: _Z7func_04i
+MANGLED-NEXT: /tmp/dbginfo{{[/\]+}}symbols.part1.cpp:22
+
+# Show that containing function name can be printed in demangled form.
+RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so _Z7func_04i | FileCheck --check-prefix=NOTMANGLED %s
+RUN: llvm-addr2line --obj=%p/Inputs/symbols.so -f --demangle _Z7func_04i | FileCheck --check-prefix=NOTMANGLED %s
+NOTMANGLED: func_04(int)
+NOTMANGLED-NEXT: /tmp/dbginfo{{[/\]+}}symbols.part1.cpp:22
+
+# Show that both the symbol and input file can be specified in the search string on the command-line.
+RUN: llvm-addr2line "%p/Inputs/symbols.so func_01" | FileCheck --check-prefix=SYMBIN %s
+RUN: llvm-symbolizer "%p/Inputs/symbols.so func_01" | FileCheck --check-prefix=SYMBIN %s
+SYMBIN: /tmp/dbginfo{{[/\]+}}symbols.part1.cpp:12
+
+# Show that the case of missing input file specified in the search string on the command-line is properly treated.
+RUN: llvm-addr2line "%p/Inputs/666.so func_01" 2> %t.1.stderr | FileCheck --check-prefix=NONEXISTENT %s
+RUN: FileCheck --input-file=%t.1.stderr --check-prefix=BINARY-NOT-FOUND -DMSG=%errc_ENOENT %s
+RUN: llvm-symbolizer "%p/Inputs/666.so func_01" 2> %t.2.stderr | FileCheck --check-prefix=NONEXISTENT %s
+RUN: FileCheck --input-file=%t.2.stderr --check-prefix=BINARY-NOT-FOUND -DMSG=%errc_ENOENT %s
+BINARY-NOT-FOUND: error: '{{.*}}666.so': [[MSG]]
diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
index 78a0e6772f3fb36..447c18abadc1743 100644
--- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -159,7 +159,7 @@ static Error makeStringError(StringRef Msg) {
static Error parseCommand(StringRef BinaryName, bool IsAddr2Line,
StringRef InputString, Command &Cmd,
std::string &ModuleName, object::BuildID &BuildID,
- uint64_t &ModuleOffset) {
+ StringRef &Symbol, uint64_t &ModuleOffset) {
ModuleName = BinaryName;
if (InputString.consume_front("CODE ")) {
Cmd = Command::Code;
@@ -224,35 +224,41 @@ static Error parseCommand(StringRef BinaryName, bool IsAddr2Line,
return makeStringError("no input filename has been specified");
}
- // Parse module offset.
+ // Parse module offset, which can be specified as a number or as a symbol.
InputString = InputString.ltrim();
if (InputString.empty())
return makeStringError("no module offset has been specified");
+
+ // If input string contains a space, ignore everything after it. This behavior
+ // is consistent with GNU addr2line.
int OffsetLength = InputString.find_first_of(" \n\r");
StringRef Offset = InputString.substr(0, OffsetLength);
+
// GNU addr2line assumes the offset is hexadecimal and allows a redundant
// "0x" or "0X" prefix; do the same for compatibility.
if (IsAddr2Line)
Offset.consume_front("0x") || Offset.consume_front("0X");
- // If the input is not a valid module offset, it is not an error, but its
- // lookup does not make sense. Return error of
diff erent kind to distinguish
- // from error or success.
- if (Offset.getAsInteger(IsAddr2Line ? 16 : 0, ModuleOffset))
- return errorCodeToError(errc::invalid_argument);
+ // If the input is not a number, treat it is a symbol.
+ if (Offset.getAsInteger(IsAddr2Line ? 16 : 0, ModuleOffset)) {
+ Symbol = Offset;
+ ModuleOffset = 0;
+ }
return Error::success();
}
template <typename T>
void executeCommand(StringRef ModuleName, const T &ModuleSpec, Command Cmd,
- uint64_t Offset, uint64_t AdjustVMA, bool ShouldInline,
- OutputStyle Style, LLVMSymbolizer &Symbolizer,
- DIPrinter &Printer) {
+ StringRef Symbol, uint64_t Offset, uint64_t AdjustVMA,
+ bool ShouldInline, OutputStyle Style,
+ LLVMSymbolizer &Symbolizer, DIPrinter &Printer) {
uint64_t AdjustedOffset = Offset - AdjustVMA;
object::SectionedAddress Address = {AdjustedOffset,
object::SectionedAddress::UndefSection};
- Request SymRequest = {ModuleName, Offset};
+ Request SymRequest = {
+ ModuleName, Symbol.empty() ? std::make_optional(Offset) : std::nullopt,
+ Symbol};
if (Cmd == Command::Data) {
Expected<DIGlobal> ResOrErr = Symbolizer.symbolizeData(ModuleSpec, Address);
print(SymRequest, ResOrErr, Printer);
@@ -260,6 +266,10 @@ void executeCommand(StringRef ModuleName, const T &ModuleSpec, Command Cmd,
Expected<std::vector<DILocal>> ResOrErr =
Symbolizer.symbolizeFrame(ModuleSpec, Address);
print(SymRequest, ResOrErr, Printer);
+ } else if (!Symbol.empty()) {
+ Expected<std::vector<DILineInfo>> ResOrErr =
+ Symbolizer.findSymbol(ModuleSpec, Symbol);
+ print(SymRequest, ResOrErr, Printer);
} else if (ShouldInline) {
Expected<DIInliningInfo> ResOrErr =
Symbolizer.symbolizeInlinedCode(ModuleSpec, Address);
@@ -288,7 +298,7 @@ void executeCommand(StringRef ModuleName, const T &ModuleSpec, Command Cmd,
}
static void printUnknownLineInfo(std::string ModuleName, DIPrinter &Printer) {
- Request SymRequest = {ModuleName, std::nullopt};
+ Request SymRequest = {ModuleName, std::nullopt, StringRef()};
Printer.print(SymRequest, DILineInfo());
}
@@ -301,16 +311,14 @@ static void symbolizeInput(const opt::InputArgList &Args,
std::string ModuleName;
object::BuildID BuildID(IncomingBuildID.begin(), IncomingBuildID.end());
uint64_t Offset = 0;
+ StringRef Symbol;
if (Error E = parseCommand(Args.getLastArgValue(OPT_obj_EQ), IsAddr2Line,
StringRef(InputString), Cmd, ModuleName, BuildID,
- Offset)) {
- handleAllErrors(
- std::move(E),
- [&](const StringError &EI) {
- printError(EI, InputString);
- printUnknownLineInfo(ModuleName, Printer);
- },
- [&](const ECError &EI) { printUnknownLineInfo(ModuleName, Printer); });
+ Symbol, Offset)) {
+ handleAllErrors(std::move(E), [&](const StringError &EI) {
+ printError(EI, InputString);
+ printUnknownLineInfo(ModuleName, Printer);
+ });
return;
}
bool ShouldInline = Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line);
@@ -319,11 +327,11 @@ static void symbolizeInput(const opt::InputArgList &Args,
if (!Args.hasArg(OPT_no_debuginfod))
enableDebuginfod(Symbolizer, Args);
std::string BuildIDStr = toHex(BuildID);
- executeCommand(BuildIDStr, BuildID, Cmd, Offset, AdjustVMA, ShouldInline,
- Style, Symbolizer, Printer);
+ executeCommand(BuildIDStr, BuildID, Cmd, Symbol, Offset, AdjustVMA,
+ ShouldInline, Style, Symbolizer, Printer);
} else {
- executeCommand(ModuleName, ModuleName, Cmd, Offset, AdjustVMA, ShouldInline,
- Style, Symbolizer, Printer);
+ executeCommand(ModuleName, ModuleName, Cmd, Symbol, Offset, AdjustVMA,
+ ShouldInline, Style, Symbolizer, Printer);
}
}
@@ -527,7 +535,7 @@ int llvm_symbolizer_main(int argc, char **argv, const llvm::ToolContext &) {
if (auto *Arg = Args.getLastArg(OPT_obj_EQ); Arg) {
auto Status = Symbolizer.getOrCreateModuleInfo(Arg->getValue());
if (!Status) {
- Request SymRequest = {Arg->getValue(), 0};
+ Request SymRequest = {Arg->getValue(), 0, StringRef()};
handleAllErrors(Status.takeError(), [&](const ErrorInfoBase &EI) {
Printer->printError(SymRequest, EI);
});
diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp
index 5984be98d798a86..682f79a540cdc5c 100644
--- a/llvm/unittests/ProfileData/MemProfTest.cpp
+++ b/llvm/unittests/ProfileData/MemProfTest.cpp
@@ -20,6 +20,7 @@ using ::llvm::DIInliningInfo;
using ::llvm::DILineInfo;
using ::llvm::DILineInfoSpecifier;
using ::llvm::DILocal;
+using ::llvm::StringRef;
using ::llvm::memprof::CallStackMap;
using ::llvm::memprof::Frame;
using ::llvm::memprof::FrameId;
@@ -53,6 +54,9 @@ class MockSymbolizer : public SymbolizableModule {
virtual std::vector<DILocal> symbolizeFrame(SectionedAddress) const {
llvm_unreachable("unused");
}
+ virtual std::vector<SectionedAddress> findSymbol(StringRef Symbol) const {
+ llvm_unreachable("unused");
+ }
virtual bool isWin32Module() const { llvm_unreachable("unused"); }
virtual uint64_t getModulePreferredBase() const {
llvm_unreachable("unused");
More information about the llvm-commits
mailing list