[llvm] 9a709dd - llvm-addr2line: assume addresses on the command line are hexadecimal rather than attempting to guess the base based on the form of the number.
Richard Smith via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 16 16:16:29 PDT 2020
Author: Richard Smith
Date: 2020-04-16T16:16:21-07:00
New Revision: 9a709dd2bb452883e1f1cf626d60c3f03801a9f3
URL: https://github.com/llvm/llvm-project/commit/9a709dd2bb452883e1f1cf626d60c3f03801a9f3
DIFF: https://github.com/llvm/llvm-project/commit/9a709dd2bb452883e1f1cf626d60c3f03801a9f3.diff
LOG: llvm-addr2line: assume addresses on the command line are hexadecimal rather than attempting to guess the base based on the form of the number.
Summary:
This matches the behavior of GNU addr2line. We previously treated
hexadecimal addresses as binary if they started with 0b, otherwise as
octal if they started with 0, otherwise as decimal.
This only affects llvm-addr2line; the behavior of llvm-symbolize is
unaffected.
Reviewers: ikudrin, rupprecht, jhenderson
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D73306
Added:
llvm/test/tools/llvm-symbolizer/input-base.test
Modified:
llvm/docs/CommandGuide/llvm-addr2line.rst
llvm/test/lit.cfg.py
llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
Removed:
################################################################################
diff --git a/llvm/docs/CommandGuide/llvm-addr2line.rst b/llvm/docs/CommandGuide/llvm-addr2line.rst
index 27b0fd9a947b..646a159cd24a 100644
--- a/llvm/docs/CommandGuide/llvm-addr2line.rst
+++ b/llvm/docs/CommandGuide/llvm-addr2line.rst
@@ -17,6 +17,11 @@ GNU's :program:`addr2line`.
Here are some of those
diff erences:
+- ``llvm-addr2line`` interprets all addresses as hexadecimal and ignores an
+ optional ``0x`` prefix, whereas ``llvm-symbolizer`` attempts to determine
+ the base from the literal's prefix and defaults to decimal if there is no
+ prefix.
+
- ``llvm-addr2line`` defaults not to print function names. Use `-f`_ to enable
that.
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index d41798248072..49d345566b65 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -146,10 +146,10 @@ def get_asan_rtlib():
# FIXME: Why do we have both `lli` and `%lli` that do slightly
diff erent things?
tools.extend([
'dsymutil', 'lli', 'lli-child-target', 'llvm-ar', 'llvm-as',
- 'llvm-bcanalyzer', 'llvm-config', 'llvm-cov', 'llvm-cxxdump', 'llvm-cvtres',
- 'llvm-
diff ', 'llvm-dis', 'llvm-dwarfdump', 'llvm-exegesis', 'llvm-extract',
- 'llvm-isel-fuzzer', 'llvm-ifs', 'llvm-install-name-tool',
- 'llvm-jitlink', 'llvm-opt-fuzzer', 'llvm-lib',
+ 'llvm-addr2line', 'llvm-bcanalyzer', 'llvm-config', 'llvm-cov',
+ 'llvm-cxxdump', 'llvm-cvtres', 'llvm-
diff ', 'llvm-dis', 'llvm-dwarfdump',
+ 'llvm-exegesis', 'llvm-extract', 'llvm-isel-fuzzer', 'llvm-ifs',
+ 'llvm-install-name-tool', 'llvm-jitlink', 'llvm-opt-fuzzer', 'llvm-lib',
'llvm-link', 'llvm-lto', 'llvm-lto2', 'llvm-mc', 'llvm-mca',
'llvm-modextract', 'llvm-nm', 'llvm-objcopy', 'llvm-objdump',
'llvm-pdbutil', 'llvm-profdata', 'llvm-ranlib', 'llvm-rc', 'llvm-readelf',
diff --git a/llvm/test/tools/llvm-symbolizer/input-base.test b/llvm/test/tools/llvm-symbolizer/input-base.test
new file mode 100644
index 000000000000..66244a7203c0
--- /dev/null
+++ b/llvm/test/tools/llvm-symbolizer/input-base.test
@@ -0,0 +1,33 @@
+# llvm-symbolizer infers the number base from the form of the address.
+RUN: llvm-symbolizer -e /dev/null -a 0x1234 | FileCheck %s
+RUN: llvm-symbolizer -e /dev/null -a 0X1234 | FileCheck %s
+RUN: llvm-symbolizer -e /dev/null -a 4660 | FileCheck %s
+RUN: llvm-symbolizer -e /dev/null -a 011064 | FileCheck %s
+RUN: llvm-symbolizer -e /dev/null -a 0b1001000110100 | FileCheck %s
+RUN: llvm-symbolizer -e /dev/null -a 0B1001000110100 | FileCheck %s
+RUN: llvm-symbolizer -e /dev/null -a 0o11064 | FileCheck %s
+
+# llvm-symbolizer / StringRef::getAsInteger only accepts the 0o prefix in lowercase.
+RUN: llvm-symbolizer -e /dev/null -a 0O1234 | FileCheck %s --check-prefix=INVALID-NOT-OCTAL-UPPER
+
+# llvm-addr2line always requires hexadecimal, but accepts an optional 0x prefix.
+RUN: llvm-addr2line -e /dev/null -a 0x1234 | FileCheck %s
+RUN: llvm-addr2line -e /dev/null -a 0X1234 | FileCheck %s
+RUN: llvm-addr2line -e /dev/null -a 1234 | FileCheck %s
+RUN: llvm-addr2line -e /dev/null -a 01234 | FileCheck %s
+RUN: llvm-addr2line -e /dev/null -a 0b1010 | FileCheck %s --check-prefix=HEXADECIMAL-NOT-BINARY
+RUN: llvm-addr2line -e /dev/null -a 0B1010 | FileCheck %s --check-prefix=HEXADECIMAL-NOT-BINARY
+RUN: llvm-addr2line -e /dev/null -a 0o1234 | FileCheck %s --check-prefix=INVALID-NOT-OCTAL-LOWER
+RUN: llvm-addr2line -e /dev/null -a 0O1234 | FileCheck %s --check-prefix=INVALID-NOT-OCTAL-UPPER
+
+CHECK: 0x1234
+CHECK-NEXT: ??
+
+HEXADECIMAL-NOT-BINARY: 0xb1010
+HEXADECIMAL-NOT-BINARY: ??
+
+INVALID-NOT-OCTAL-LOWER: 0o1234
+INVALID-NOT-OCTAL-LOWER-NOT: ??
+
+INVALID-NOT-OCTAL-UPPER: 0O1234
+INVALID-NOT-OCTAL-UPPER-NOT: ??
diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
index 8d46bd2cb627..bb282a52b331 100644
--- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -181,7 +181,7 @@ enum class Command {
Frame,
};
-static bool parseCommand(StringRef InputString, Command &Cmd,
+static bool parseCommand(bool IsAddr2Line, StringRef InputString, Command &Cmd,
std::string &ModuleName, uint64_t &ModuleOffset) {
const char kDelimiters[] = " \n\r";
ModuleName = "";
@@ -218,15 +218,21 @@ static bool parseCommand(StringRef InputString, Command &Cmd,
// Skip delimiters and parse module offset.
Pos += strspn(Pos, kDelimiters);
int OffsetLength = strcspn(Pos, kDelimiters);
- return !StringRef(Pos, OffsetLength).getAsInteger(0, ModuleOffset);
+ StringRef Offset(Pos, OffsetLength);
+ // GNU addr2line assumes the offset is hexadecimal and allows a redundant
+ // "0x" or "0X" prefix; do the same for compatibility.
+ if (IsAddr2Line)
+ Offset.consume_front("0x") || Offset.consume_front("0X");
+ return !Offset.getAsInteger(IsAddr2Line ? 16 : 0, ModuleOffset);
}
-static void symbolizeInput(StringRef InputString, LLVMSymbolizer &Symbolizer,
- DIPrinter &Printer) {
+static void symbolizeInput(bool IsAddr2Line, StringRef InputString,
+ LLVMSymbolizer &Symbolizer, DIPrinter &Printer) {
Command Cmd;
std::string ModuleName;
uint64_t Offset = 0;
- if (!parseCommand(StringRef(InputString), Cmd, ModuleName, Offset)) {
+ if (!parseCommand(IsAddr2Line, StringRef(InputString), Cmd, ModuleName,
+ Offset)) {
outs() << InputString << "\n";
return;
}
@@ -340,12 +346,12 @@ int main(int argc, char **argv) {
std::remove_if(StrippedInputString.begin(), StrippedInputString.end(),
[](char c) { return c == '\r' || c == '\n'; }),
StrippedInputString.end());
- symbolizeInput(StrippedInputString, Symbolizer, Printer);
+ symbolizeInput(IsAddr2Line, StrippedInputString, Symbolizer, Printer);
outs().flush();
}
} else {
for (StringRef Address : ClInputAddresses)
- symbolizeInput(Address, Symbolizer, Printer);
+ symbolizeInput(IsAddr2Line, Address, Symbolizer, Printer);
}
return 0;
More information about the llvm-commits
mailing list