[llvm] [llvm-symbolizer] restore --[no-]use-symbol-table option (PR #71008)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 8 17:49:36 PST 2023
https://github.com/quic-likaid updated https://github.com/llvm/llvm-project/pull/71008
>From 65e16640f185fc237aec0f02116da02bbe7724d2 Mon Sep 17 00:00:00 2001
From: Kevin Ding <quic_likaid at quicinc.com>
Date: Tue, 31 Oct 2023 14:59:14 +0800
Subject: [PATCH 1/2] [llvm-symbolizer] restore --[no-]use-symbol-table option
Sections in relocatable ELFs have their `sh_addr` set to 0. This can
confuse llvm-symbolizer when it tries to use symbol table to get
function name. It may end up with a global variable in the bss section.
This is observed when the symbolizer is used for Linux's dynamically
loadable kernel modules.
The option was unintentionally removed by 593e196, and remained as a
no-op since 3d54976. Adding back the option allows us to prevent the
undesired behaviour.
---
llvm/docs/CommandGuide/llvm-symbolizer.rst | 10 +
.../llvm-symbolizer/no-use-symbol-table.s | 219 ++++++++++++++++++
llvm/tools/llvm-symbolizer/Opts.td | 2 +
.../tools/llvm-symbolizer/llvm-symbolizer.cpp | 3 +-
4 files changed, 233 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/tools/llvm-symbolizer/no-use-symbol-table.s
diff --git a/llvm/docs/CommandGuide/llvm-symbolizer.rst b/llvm/docs/CommandGuide/llvm-symbolizer.rst
index 59c0ab6d196ace1..ae12a1471811190 100644
--- a/llvm/docs/CommandGuide/llvm-symbolizer.rst
+++ b/llvm/docs/CommandGuide/llvm-symbolizer.rst
@@ -314,6 +314,11 @@ OPTIONS
Don't print demangled function names.
+.. option:: --no-use-symbol-table
+
+ Don't prefer function names stored in symbol table to function names in debug
+ info sections.
+
.. option:: --obj <path>, --exe, -e
Path to object file to be symbolized. If ``-`` is specified, read the object
@@ -458,6 +463,11 @@ OPTIONS
of the absolute path. If the command-line to the compiler included
the full path, this will be the same as the default.
+.. option:: --use-symbol-table
+
+ Prefer function names stored in symbol table to function names in debug info
+ sections. This is the default.
+
.. option:: --verbose
Print verbose address, line and column information.
diff --git a/llvm/test/tools/llvm-symbolizer/no-use-symbol-table.s b/llvm/test/tools/llvm-symbolizer/no-use-symbol-table.s
new file mode 100644
index 000000000000000..3702de25307ccd7
--- /dev/null
+++ b/llvm/test/tools/llvm-symbolizer/no-use-symbol-table.s
@@ -0,0 +1,219 @@
+# REQUIRES: x86-registered-target
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
+# RUN: llvm-addr2line --no-use-symbol-table -fe %t.o 0x1 | FileCheck %s
+# CHECK: foo
+# CHECK: relocatable.c
+# The above addr2line command prints "b" if --use-symbol-table
+
+# Produced from the following program, compiled with clang -g -S
+# char a;
+# char b;
+# void foo() {}
+
+ .text
+ .file "relocatable.c"
+ .file 0 "/workspaces/llvm-project/llvm/test/tools/llvm-symbolizer/Inputs" "relocatable.c" md5 0xee7a86d561e1ec33352f18460ecf7166
+ .globl foo # -- Begin function foo
+ .p2align 4, 0x90
+ .type foo, at function
+foo: # @foo
+.Lfunc_begin0:
+ .loc 0 6 0 # relocatable.c:6:0
+ .cfi_startproc
+# %bb.0:
+ pushq %rbp
+ .cfi_def_cfa_offset 16
+ .cfi_offset %rbp, -16
+ movq %rsp, %rbp
+ .cfi_def_cfa_register %rbp
+.Ltmp0:
+ .loc 0 6 13 prologue_end # relocatable.c:6:13
+ popq %rbp
+ .cfi_def_cfa %rsp, 8
+ retq
+.Ltmp1:
+.Lfunc_end0:
+ .size foo, .Lfunc_end0-foo
+ .cfi_endproc
+ # -- End function
+ .type a, at object # @a
+ .bss
+ .globl a
+a:
+ .byte 0 # 0x0
+ .size a, 1
+
+ .type b, at object # @b
+ .globl b
+b:
+ .byte 0 # 0x0
+ .size b, 1
+
+ .section .debug_abbrev,"", at progbits
+ .byte 1 # Abbreviation Code
+ .byte 17 # DW_TAG_compile_unit
+ .byte 1 # DW_CHILDREN_yes
+ .byte 37 # DW_AT_producer
+ .byte 37 # DW_FORM_strx1
+ .byte 19 # DW_AT_language
+ .byte 5 # DW_FORM_data2
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 114 # DW_AT_str_offsets_base
+ .byte 23 # DW_FORM_sec_offset
+ .byte 16 # DW_AT_stmt_list
+ .byte 23 # DW_FORM_sec_offset
+ .byte 27 # DW_AT_comp_dir
+ .byte 37 # DW_FORM_strx1
+ .byte 17 # DW_AT_low_pc
+ .byte 27 # DW_FORM_addrx
+ .byte 18 # DW_AT_high_pc
+ .byte 6 # DW_FORM_data4
+ .byte 115 # DW_AT_addr_base
+ .byte 23 # DW_FORM_sec_offset
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 2 # Abbreviation Code
+ .byte 52 # DW_TAG_variable
+ .byte 0 # DW_CHILDREN_no
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 63 # DW_AT_external
+ .byte 25 # DW_FORM_flag_present
+ .byte 58 # DW_AT_decl_file
+ .byte 11 # DW_FORM_data1
+ .byte 59 # DW_AT_decl_line
+ .byte 11 # DW_FORM_data1
+ .byte 2 # DW_AT_location
+ .byte 24 # DW_FORM_exprloc
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 3 # Abbreviation Code
+ .byte 36 # DW_TAG_base_type
+ .byte 0 # DW_CHILDREN_no
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 62 # DW_AT_encoding
+ .byte 11 # DW_FORM_data1
+ .byte 11 # DW_AT_byte_size
+ .byte 11 # DW_FORM_data1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 4 # Abbreviation Code
+ .byte 46 # DW_TAG_subprogram
+ .byte 0 # DW_CHILDREN_no
+ .byte 17 # DW_AT_low_pc
+ .byte 27 # DW_FORM_addrx
+ .byte 18 # DW_AT_high_pc
+ .byte 6 # DW_FORM_data4
+ .byte 64 # DW_AT_frame_base
+ .byte 24 # DW_FORM_exprloc
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 58 # DW_AT_decl_file
+ .byte 11 # DW_FORM_data1
+ .byte 59 # DW_AT_decl_line
+ .byte 11 # DW_FORM_data1
+ .byte 63 # DW_AT_external
+ .byte 25 # DW_FORM_flag_present
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+ .section .debug_info,"", at progbits
+.Lcu_begin0:
+ .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+ .short 5 # DWARF version number
+ .byte 1 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev # Offset Into Abbrev. Section
+ .byte 1 # Abbrev [1] 0xc:0x3d DW_TAG_compile_unit
+ .byte 0 # DW_AT_producer
+ .short 12 # DW_AT_language
+ .byte 1 # DW_AT_name
+ .long .Lstr_offsets_base0 # DW_AT_str_offsets_base
+ .long .Lline_table_start0 # DW_AT_stmt_list
+ .byte 2 # DW_AT_comp_dir
+ .byte 2 # DW_AT_low_pc
+ .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
+ .long .Laddr_table_base0 # DW_AT_addr_base
+ .byte 2 # Abbrev [2] 0x23:0xb DW_TAG_variable
+ .byte 3 # DW_AT_name
+ .long 46 # DW_AT_type
+ # DW_AT_external
+ .byte 0 # DW_AT_decl_file
+ .byte 3 # DW_AT_decl_line
+ .byte 2 # DW_AT_location
+ .byte 161
+ .byte 0
+ .byte 3 # Abbrev [3] 0x2e:0x4 DW_TAG_base_type
+ .byte 4 # DW_AT_name
+ .byte 6 # DW_AT_encoding
+ .byte 1 # DW_AT_byte_size
+ .byte 2 # Abbrev [2] 0x32:0xb DW_TAG_variable
+ .byte 5 # DW_AT_name
+ .long 46 # DW_AT_type
+ # DW_AT_external
+ .byte 0 # DW_AT_decl_file
+ .byte 4 # DW_AT_decl_line
+ .byte 2 # DW_AT_location
+ .byte 161
+ .byte 1
+ .byte 4 # Abbrev [4] 0x3d:0xb DW_TAG_subprogram
+ .byte 2 # DW_AT_low_pc
+ .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
+ .byte 1 # DW_AT_frame_base
+ .byte 86
+ .byte 6 # DW_AT_name
+ .byte 0 # DW_AT_decl_file
+ .byte 6 # DW_AT_decl_line
+ # DW_AT_external
+ .byte 0 # End Of Children Mark
+.Ldebug_info_end0:
+ .section .debug_str_offsets,"", at progbits
+ .long 32 # Length of String Offsets Set
+ .short 5
+ .short 0
+.Lstr_offsets_base0:
+ .section .debug_str,"MS", at progbits,1
+.Linfo_string0:
+ .asciz "Debian clang version 14.0.6" # string offset=0
+.Linfo_string1:
+ .asciz "relocatable.c" # string offset=28
+.Linfo_string2:
+ .asciz "/workspaces/llvm-project/llvm/test/tools/llvm-symbolizer/Inputs" # string offset=42
+.Linfo_string3:
+ .asciz "a" # string offset=106
+.Linfo_string4:
+ .asciz "char" # string offset=108
+.Linfo_string5:
+ .asciz "b" # string offset=113
+.Linfo_string6:
+ .asciz "foo" # string offset=115
+ .section .debug_str_offsets,"", at progbits
+ .long .Linfo_string0
+ .long .Linfo_string1
+ .long .Linfo_string2
+ .long .Linfo_string3
+ .long .Linfo_string4
+ .long .Linfo_string5
+ .long .Linfo_string6
+ .section .debug_addr,"", at progbits
+ .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution
+.Ldebug_addr_start0:
+ .short 5 # DWARF version number
+ .byte 8 # Address size
+ .byte 0 # Segment selector size
+.Laddr_table_base0:
+ .quad a
+ .quad b
+ .quad .Lfunc_begin0
+.Ldebug_addr_end0:
+ .ident "Debian clang version 14.0.6"
+ .section ".note.GNU-stack","", at progbits
+ .addrsig
+ .section .debug_line,"", at progbits
+.Lline_table_start0:
diff --git a/llvm/tools/llvm-symbolizer/Opts.td b/llvm/tools/llvm-symbolizer/Opts.td
index edc80bfe59673ba..02af595cf1f3139 100644
--- a/llvm/tools/llvm-symbolizer/Opts.td
+++ b/llvm/tools/llvm-symbolizer/Opts.td
@@ -57,6 +57,8 @@ def relative_address : F<"relative-address", "Interpret addresses as addresses r
def relativenames : F<"relativenames", "Strip the compilation directory from paths">;
defm untag_addresses : B<"untag-addresses", "", "Remove memory tags from addresses before symbolization">;
def use_dia: F<"dia", "Use the DIA library to access symbols (Windows only)">;
+defm use_symbol_table : B<"use-symbol-table", "Prefer function names stored in symbol table",
+ "Don't prefer function names stored in symbol table">;
def verbose : F<"verbose", "Print verbose line info">;
def version : F<"version", "Display the version">;
diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
index 447c18abadc1743..07bb896389c9bcd 100644
--- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -477,7 +477,8 @@ int llvm_symbolizer_main(int argc, char **argv, const llvm::ToolContext &) {
Opts.UseDIA = false;
}
#endif
- Opts.UseSymbolTable = true;
+ Opts.UseSymbolTable =
+ Args.hasFlag(OPT_use_symbol_table, OPT_no_use_symbol_table, true);
if (Args.hasArg(OPT_cache_size_EQ))
parseIntArg(Args, OPT_cache_size_EQ, Opts.MaxCacheSize);
Config.PrintAddress = Args.hasArg(OPT_addresses);
>From 15465b4e112062569b8138e5862a1786a93349ae Mon Sep 17 00:00:00 2001
From: Kevin Ding <quic_likaid at quicinc.com>
Date: Thu, 9 Nov 2023 09:48:01 +0800
Subject: [PATCH 2/2] update test
---
...-use-symbol-table.s => use-symbol-table.s} | 27 +++++++++++++------
1 file changed, 19 insertions(+), 8 deletions(-)
rename llvm/test/tools/llvm-symbolizer/{no-use-symbol-table.s => use-symbol-table.s} (93%)
diff --git a/llvm/test/tools/llvm-symbolizer/no-use-symbol-table.s b/llvm/test/tools/llvm-symbolizer/use-symbol-table.s
similarity index 93%
rename from llvm/test/tools/llvm-symbolizer/no-use-symbol-table.s
rename to llvm/test/tools/llvm-symbolizer/use-symbol-table.s
index 3702de25307ccd7..e1be28c14b7032f 100644
--- a/llvm/test/tools/llvm-symbolizer/no-use-symbol-table.s
+++ b/llvm/test/tools/llvm-symbolizer/use-symbol-table.s
@@ -1,15 +1,26 @@
# REQUIRES: x86-registered-target
# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
-# RUN: llvm-addr2line --no-use-symbol-table -fe %t.o 0x1 | FileCheck %s
-# CHECK: foo
-# CHECK: relocatable.c
-# The above addr2line command prints "b" if --use-symbol-table
+# RUN: llvm-addr2line --no-use-symbol-table -f -e %t.o 0x1 | FileCheck %s --check-prefix=OFF
+# OFF: foo
+# OFF-NEXT: relocatable.c
-# Produced from the following program, compiled with clang -g -S
-# char a;
-# char b;
-# void foo() {}
+## Produced from the following program, compiled with clang -g -S
+## (clang 14.0.6 / Debian 12).
+## char a;
+## char b;
+## void foo() {}
+
+## nm use-symbol-table.s
+## 0000000000000000 B a
+## 0000000000000001 B b
+## 0000000000000000 T foo
+
+## With --use-symbol-table (default), the symbolizer tries to use the symbol
+## table to override the function name from DWARF. In this case, "b" is returned.
+# RUN: llvm-addr2line --use-symbol-table -f -e %t.o 0x1 | FileCheck %s --check-prefix=ON
+# ON: b
+# ON-NEXT: relocatable.c
.text
.file "relocatable.c"
More information about the llvm-commits
mailing list