[llvm] 9a709dd - llvm-addr2line: assume addresses on the command line are hexadecimal rather than attempting to guess the base based on the form of the number.

Richard Smith via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 16 16:16:29 PDT 2020


Author: Richard Smith
Date: 2020-04-16T16:16:21-07:00
New Revision: 9a709dd2bb452883e1f1cf626d60c3f03801a9f3

URL: https://github.com/llvm/llvm-project/commit/9a709dd2bb452883e1f1cf626d60c3f03801a9f3
DIFF: https://github.com/llvm/llvm-project/commit/9a709dd2bb452883e1f1cf626d60c3f03801a9f3.diff

LOG: llvm-addr2line: assume addresses on the command line are hexadecimal rather than attempting to guess the base based on the form of the number.

Summary:
This matches the behavior of GNU addr2line. We previously treated
hexadecimal addresses as binary if they started with 0b, otherwise as
octal if they started with 0, otherwise as decimal.

This only affects llvm-addr2line; the behavior of llvm-symbolize is
unaffected.

Reviewers: ikudrin, rupprecht, jhenderson

Subscribers: llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D73306

Added: 
    llvm/test/tools/llvm-symbolizer/input-base.test

Modified: 
    llvm/docs/CommandGuide/llvm-addr2line.rst
    llvm/test/lit.cfg.py
    llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/docs/CommandGuide/llvm-addr2line.rst b/llvm/docs/CommandGuide/llvm-addr2line.rst
index 27b0fd9a947b..646a159cd24a 100644
--- a/llvm/docs/CommandGuide/llvm-addr2line.rst
+++ b/llvm/docs/CommandGuide/llvm-addr2line.rst
@@ -17,6 +17,11 @@ GNU's :program:`addr2line`.
 
 Here are some of those 
diff erences:
 
+-  ``llvm-addr2line`` interprets all addresses as hexadecimal and ignores an
+   optional ``0x`` prefix, whereas ``llvm-symbolizer`` attempts to determine
+   the base from the literal's prefix and defaults to decimal if there is no
+   prefix.
+
 -  ``llvm-addr2line`` defaults not to print function names. Use `-f`_ to enable
    that.
 

diff  --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index d41798248072..49d345566b65 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -146,10 +146,10 @@ def get_asan_rtlib():
 # FIXME: Why do we have both `lli` and `%lli` that do slightly 
diff erent things?
 tools.extend([
     'dsymutil', 'lli', 'lli-child-target', 'llvm-ar', 'llvm-as',
-    'llvm-bcanalyzer', 'llvm-config', 'llvm-cov', 'llvm-cxxdump', 'llvm-cvtres',
-    'llvm-
diff ', 'llvm-dis', 'llvm-dwarfdump', 'llvm-exegesis', 'llvm-extract',
-    'llvm-isel-fuzzer', 'llvm-ifs', 'llvm-install-name-tool',
-    'llvm-jitlink', 'llvm-opt-fuzzer', 'llvm-lib',
+    'llvm-addr2line', 'llvm-bcanalyzer', 'llvm-config', 'llvm-cov',
+    'llvm-cxxdump', 'llvm-cvtres', 'llvm-
diff ', 'llvm-dis', 'llvm-dwarfdump',
+    'llvm-exegesis', 'llvm-extract', 'llvm-isel-fuzzer', 'llvm-ifs',
+    'llvm-install-name-tool', 'llvm-jitlink', 'llvm-opt-fuzzer', 'llvm-lib',
     'llvm-link', 'llvm-lto', 'llvm-lto2', 'llvm-mc', 'llvm-mca',
     'llvm-modextract', 'llvm-nm', 'llvm-objcopy', 'llvm-objdump',
     'llvm-pdbutil', 'llvm-profdata', 'llvm-ranlib', 'llvm-rc', 'llvm-readelf',

diff  --git a/llvm/test/tools/llvm-symbolizer/input-base.test b/llvm/test/tools/llvm-symbolizer/input-base.test
new file mode 100644
index 000000000000..66244a7203c0
--- /dev/null
+++ b/llvm/test/tools/llvm-symbolizer/input-base.test
@@ -0,0 +1,33 @@
+# llvm-symbolizer infers the number base from the form of the address.
+RUN: llvm-symbolizer -e /dev/null -a 0x1234 | FileCheck %s
+RUN: llvm-symbolizer -e /dev/null -a 0X1234 | FileCheck %s
+RUN: llvm-symbolizer -e /dev/null -a 4660 | FileCheck %s
+RUN: llvm-symbolizer -e /dev/null -a 011064 | FileCheck %s
+RUN: llvm-symbolizer -e /dev/null -a 0b1001000110100 | FileCheck %s
+RUN: llvm-symbolizer -e /dev/null -a 0B1001000110100 | FileCheck %s
+RUN: llvm-symbolizer -e /dev/null -a 0o11064 | FileCheck %s
+
+# llvm-symbolizer / StringRef::getAsInteger only accepts the 0o prefix in lowercase.
+RUN: llvm-symbolizer -e /dev/null -a 0O1234 | FileCheck %s --check-prefix=INVALID-NOT-OCTAL-UPPER
+
+# llvm-addr2line always requires hexadecimal, but accepts an optional 0x prefix.
+RUN: llvm-addr2line -e /dev/null -a 0x1234 | FileCheck %s
+RUN: llvm-addr2line -e /dev/null -a 0X1234 | FileCheck %s
+RUN: llvm-addr2line -e /dev/null -a 1234 | FileCheck %s
+RUN: llvm-addr2line -e /dev/null -a 01234 | FileCheck %s
+RUN: llvm-addr2line -e /dev/null -a 0b1010 | FileCheck %s --check-prefix=HEXADECIMAL-NOT-BINARY
+RUN: llvm-addr2line -e /dev/null -a 0B1010 | FileCheck %s --check-prefix=HEXADECIMAL-NOT-BINARY
+RUN: llvm-addr2line -e /dev/null -a 0o1234 | FileCheck %s --check-prefix=INVALID-NOT-OCTAL-LOWER
+RUN: llvm-addr2line -e /dev/null -a 0O1234 | FileCheck %s --check-prefix=INVALID-NOT-OCTAL-UPPER
+
+CHECK: 0x1234
+CHECK-NEXT: ??
+
+HEXADECIMAL-NOT-BINARY: 0xb1010
+HEXADECIMAL-NOT-BINARY: ??
+
+INVALID-NOT-OCTAL-LOWER: 0o1234
+INVALID-NOT-OCTAL-LOWER-NOT: ??
+
+INVALID-NOT-OCTAL-UPPER: 0O1234
+INVALID-NOT-OCTAL-UPPER-NOT: ??

diff  --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
index 8d46bd2cb627..bb282a52b331 100644
--- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -181,7 +181,7 @@ enum class Command {
   Frame,
 };
 
-static bool parseCommand(StringRef InputString, Command &Cmd,
+static bool parseCommand(bool IsAddr2Line, StringRef InputString, Command &Cmd,
                          std::string &ModuleName, uint64_t &ModuleOffset) {
   const char kDelimiters[] = " \n\r";
   ModuleName = "";
@@ -218,15 +218,21 @@ static bool parseCommand(StringRef InputString, Command &Cmd,
   // Skip delimiters and parse module offset.
   Pos += strspn(Pos, kDelimiters);
   int OffsetLength = strcspn(Pos, kDelimiters);
-  return !StringRef(Pos, OffsetLength).getAsInteger(0, ModuleOffset);
+  StringRef Offset(Pos, OffsetLength);
+  // GNU addr2line assumes the offset is hexadecimal and allows a redundant
+  // "0x" or "0X" prefix; do the same for compatibility.
+  if (IsAddr2Line)
+    Offset.consume_front("0x") || Offset.consume_front("0X");
+  return !Offset.getAsInteger(IsAddr2Line ? 16 : 0, ModuleOffset);
 }
 
-static void symbolizeInput(StringRef InputString, LLVMSymbolizer &Symbolizer,
-                           DIPrinter &Printer) {
+static void symbolizeInput(bool IsAddr2Line, StringRef InputString,
+                           LLVMSymbolizer &Symbolizer, DIPrinter &Printer) {
   Command Cmd;
   std::string ModuleName;
   uint64_t Offset = 0;
-  if (!parseCommand(StringRef(InputString), Cmd, ModuleName, Offset)) {
+  if (!parseCommand(IsAddr2Line, StringRef(InputString), Cmd, ModuleName,
+                    Offset)) {
     outs() << InputString << "\n";
     return;
   }
@@ -340,12 +346,12 @@ int main(int argc, char **argv) {
           std::remove_if(StrippedInputString.begin(), StrippedInputString.end(),
                          [](char c) { return c == '\r' || c == '\n'; }),
           StrippedInputString.end());
-      symbolizeInput(StrippedInputString, Symbolizer, Printer);
+      symbolizeInput(IsAddr2Line, StrippedInputString, Symbolizer, Printer);
       outs().flush();
     }
   } else {
     for (StringRef Address : ClInputAddresses)
-      symbolizeInput(Address, Symbolizer, Printer);
+      symbolizeInput(IsAddr2Line, Address, Symbolizer, Printer);
   }
 
   return 0;


        


More information about the llvm-commits mailing list