[llvm] [llvm-symbolizer] restore --[no-]use-symbol-table option (PR #71008)

via llvm-commits llvm-commits at lists.llvm.org
Sun Nov 12 17:34:45 PST 2023

https://github.com/quic-likaid updated https://github.com/llvm/llvm-project/pull/71008

>From 65e16640f185fc237aec0f02116da02bbe7724d2 Mon Sep 17 00:00:00 2001
From: Kevin Ding <quic_likaid at quicinc.com>
Date: Tue, 31 Oct 2023 14:59:14 +0800
Subject: [PATCH 1/3] [llvm-symbolizer] restore --[no-]use-symbol-table option

Sections in relocatable ELFs have their `sh_addr` set to 0. This can
confuse llvm-symbolizer when it tries to use symbol table to get
function name. It may end up with a global variable in the bss section.
This is observed when the symbolizer is used for Linux's dynamically
loadable kernel modules.

The option was unintentionally removed by 593e196, and remained as a
no-op since 3d54976. Adding back the option allows us to prevent the
undesired behaviour.
 llvm/docs/CommandGuide/llvm-symbolizer.rst    |  10 +
 .../llvm-symbolizer/no-use-symbol-table.s     | 219 ++++++++++++++++++
 llvm/tools/llvm-symbolizer/Opts.td            |   2 +
 .../tools/llvm-symbolizer/llvm-symbolizer.cpp |   3 +-
 4 files changed, 233 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/tools/llvm-symbolizer/no-use-symbol-table.s

diff --git a/llvm/docs/CommandGuide/llvm-symbolizer.rst b/llvm/docs/CommandGuide/llvm-symbolizer.rst
index 59c0ab6d196ace1..ae12a1471811190 100644
--- a/llvm/docs/CommandGuide/llvm-symbolizer.rst
+++ b/llvm/docs/CommandGuide/llvm-symbolizer.rst
@@ -314,6 +314,11 @@ OPTIONS
   Don't print demangled function names.
+.. option:: --no-use-symbol-table
+  Don't prefer function names stored in symbol table to function names in debug
+  info sections.
 .. option:: --obj <path>, --exe, -e
   Path to object file to be symbolized. If ``-`` is specified, read the object
@@ -458,6 +463,11 @@ OPTIONS
   of the absolute path. If the command-line to the compiler included
   the full path, this will be the same as the default.
+.. option:: --use-symbol-table
+  Prefer function names stored in symbol table to function names in debug info
+  sections. This is the default.
 .. option:: --verbose
   Print verbose address, line and column information.
diff --git a/llvm/test/tools/llvm-symbolizer/no-use-symbol-table.s b/llvm/test/tools/llvm-symbolizer/no-use-symbol-table.s
new file mode 100644
index 000000000000000..3702de25307ccd7
--- /dev/null
+++ b/llvm/test/tools/llvm-symbolizer/no-use-symbol-table.s
@@ -0,0 +1,219 @@
+# REQUIRES: x86-registered-target
+# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
+# RUN: llvm-addr2line --no-use-symbol-table -fe %t.o 0x1 | FileCheck %s
+# CHECK: foo
+# CHECK: relocatable.c
+# The above addr2line command prints "b" if --use-symbol-table
+# Produced from the following program, compiled with clang -g -S
+# char a;
+# char b;
+# void foo() {}
+	.text
+	.file	"relocatable.c"
+	.file	0 "/workspaces/llvm-project/llvm/test/tools/llvm-symbolizer/Inputs" "relocatable.c" md5 0xee7a86d561e1ec33352f18460ecf7166
+	.globl	foo                             # -- Begin function foo
+	.p2align	4, 0x90
+	.type	foo, at function
+foo:                                    # @foo
+	.loc	0 6 0                           # relocatable.c:6:0
+	.cfi_startproc
+# %bb.0:
+	pushq	%rbp
+	.cfi_def_cfa_offset 16
+	.cfi_offset %rbp, -16
+	movq	%rsp, %rbp
+	.cfi_def_cfa_register %rbp
+	.loc	0 6 13 prologue_end             # relocatable.c:6:13
+	popq	%rbp
+	.cfi_def_cfa %rsp, 8
+	retq
+	.size	foo, .Lfunc_end0-foo
+	.cfi_endproc
+                                        # -- End function
+	.type	a, at object                       # @a
+	.bss
+	.globl	a
+	.byte	0                               # 0x0
+	.size	a, 1
+	.type	b, at object                       # @b
+	.globl	b
+	.byte	0                               # 0x0
+	.size	b, 1
+	.section	.debug_abbrev,"", at progbits
+	.byte	1                               # Abbreviation Code
+	.byte	17                              # DW_TAG_compile_unit
+	.byte	1                               # DW_CHILDREN_yes
+	.byte	37                              # DW_AT_producer
+	.byte	37                              # DW_FORM_strx1
+	.byte	19                              # DW_AT_language
+	.byte	5                               # DW_FORM_data2
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	114                             # DW_AT_str_offsets_base
+	.byte	23                              # DW_FORM_sec_offset
+	.byte	16                              # DW_AT_stmt_list
+	.byte	23                              # DW_FORM_sec_offset
+	.byte	27                              # DW_AT_comp_dir
+	.byte	37                              # DW_FORM_strx1
+	.byte	17                              # DW_AT_low_pc
+	.byte	27                              # DW_FORM_addrx
+	.byte	18                              # DW_AT_high_pc
+	.byte	6                               # DW_FORM_data4
+	.byte	115                             # DW_AT_addr_base
+	.byte	23                              # DW_FORM_sec_offset
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	2                               # Abbreviation Code
+	.byte	52                              # DW_TAG_variable
+	.byte	0                               # DW_CHILDREN_no
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	73                              # DW_AT_type
+	.byte	19                              # DW_FORM_ref4
+	.byte	63                              # DW_AT_external
+	.byte	25                              # DW_FORM_flag_present
+	.byte	58                              # DW_AT_decl_file
+	.byte	11                              # DW_FORM_data1
+	.byte	59                              # DW_AT_decl_line
+	.byte	11                              # DW_FORM_data1
+	.byte	2                               # DW_AT_location
+	.byte	24                              # DW_FORM_exprloc
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	3                               # Abbreviation Code
+	.byte	36                              # DW_TAG_base_type
+	.byte	0                               # DW_CHILDREN_no
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	62                              # DW_AT_encoding
+	.byte	11                              # DW_FORM_data1
+	.byte	11                              # DW_AT_byte_size
+	.byte	11                              # DW_FORM_data1
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	4                               # Abbreviation Code
+	.byte	46                              # DW_TAG_subprogram
+	.byte	0                               # DW_CHILDREN_no
+	.byte	17                              # DW_AT_low_pc
+	.byte	27                              # DW_FORM_addrx
+	.byte	18                              # DW_AT_high_pc
+	.byte	6                               # DW_FORM_data4
+	.byte	64                              # DW_AT_frame_base
+	.byte	24                              # DW_FORM_exprloc
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	58                              # DW_AT_decl_file
+	.byte	11                              # DW_FORM_data1
+	.byte	59                              # DW_AT_decl_line
+	.byte	11                              # DW_FORM_data1
+	.byte	63                              # DW_AT_external
+	.byte	25                              # DW_FORM_flag_present
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	0                               # EOM(3)
+	.section	.debug_info,"", at progbits
+	.long	.Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+	.short	5                               # DWARF version number
+	.byte	1                               # DWARF Unit Type
+	.byte	8                               # Address Size (in bytes)
+	.long	.debug_abbrev                   # Offset Into Abbrev. Section
+	.byte	1                               # Abbrev [1] 0xc:0x3d DW_TAG_compile_unit
+	.byte	0                               # DW_AT_producer
+	.short	12                              # DW_AT_language
+	.byte	1                               # DW_AT_name
+	.long	.Lstr_offsets_base0             # DW_AT_str_offsets_base
+	.long	.Lline_table_start0             # DW_AT_stmt_list
+	.byte	2                               # DW_AT_comp_dir
+	.byte	2                               # DW_AT_low_pc
+	.long	.Lfunc_end0-.Lfunc_begin0       # DW_AT_high_pc
+	.long	.Laddr_table_base0              # DW_AT_addr_base
+	.byte	2                               # Abbrev [2] 0x23:0xb DW_TAG_variable
+	.byte	3                               # DW_AT_name
+	.long	46                              # DW_AT_type
+                                        # DW_AT_external
+	.byte	0                               # DW_AT_decl_file
+	.byte	3                               # DW_AT_decl_line
+	.byte	2                               # DW_AT_location
+	.byte	161
+	.byte	0
+	.byte	3                               # Abbrev [3] 0x2e:0x4 DW_TAG_base_type
+	.byte	4                               # DW_AT_name
+	.byte	6                               # DW_AT_encoding
+	.byte	1                               # DW_AT_byte_size
+	.byte	2                               # Abbrev [2] 0x32:0xb DW_TAG_variable
+	.byte	5                               # DW_AT_name
+	.long	46                              # DW_AT_type
+                                        # DW_AT_external
+	.byte	0                               # DW_AT_decl_file
+	.byte	4                               # DW_AT_decl_line
+	.byte	2                               # DW_AT_location
+	.byte	161
+	.byte	1
+	.byte	4                               # Abbrev [4] 0x3d:0xb DW_TAG_subprogram
+	.byte	2                               # DW_AT_low_pc
+	.long	.Lfunc_end0-.Lfunc_begin0       # DW_AT_high_pc
+	.byte	1                               # DW_AT_frame_base
+	.byte	86
+	.byte	6                               # DW_AT_name
+	.byte	0                               # DW_AT_decl_file
+	.byte	6                               # DW_AT_decl_line
+                                        # DW_AT_external
+	.byte	0                               # End Of Children Mark
+	.section	.debug_str_offsets,"", at progbits
+	.long	32                              # Length of String Offsets Set
+	.short	5
+	.short	0
+	.section	.debug_str,"MS", at progbits,1
+	.asciz	"Debian clang version 14.0.6"   # string offset=0
+	.asciz	"relocatable.c"                 # string offset=28
+	.asciz	"/workspaces/llvm-project/llvm/test/tools/llvm-symbolizer/Inputs" # string offset=42
+	.asciz	"a"                             # string offset=106
+	.asciz	"char"                          # string offset=108
+	.asciz	"b"                             # string offset=113
+	.asciz	"foo"                           # string offset=115
+	.section	.debug_str_offsets,"", at progbits
+	.long	.Linfo_string0
+	.long	.Linfo_string1
+	.long	.Linfo_string2
+	.long	.Linfo_string3
+	.long	.Linfo_string4
+	.long	.Linfo_string5
+	.long	.Linfo_string6
+	.section	.debug_addr,"", at progbits
+	.long	.Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution
+	.short	5                               # DWARF version number
+	.byte	8                               # Address size
+	.byte	0                               # Segment selector size
+	.quad	a
+	.quad	b
+	.quad	.Lfunc_begin0
+	.ident	"Debian clang version 14.0.6"
+	.section	".note.GNU-stack","", at progbits
+	.addrsig
+	.section	.debug_line,"", at progbits
diff --git a/llvm/tools/llvm-symbolizer/Opts.td b/llvm/tools/llvm-symbolizer/Opts.td
index edc80bfe59673ba..02af595cf1f3139 100644
--- a/llvm/tools/llvm-symbolizer/Opts.td
+++ b/llvm/tools/llvm-symbolizer/Opts.td
@@ -57,6 +57,8 @@ def relative_address : F<"relative-address", "Interpret addresses as addresses r
 def relativenames : F<"relativenames", "Strip the compilation directory from paths">;
 defm untag_addresses : B<"untag-addresses", "", "Remove memory tags from addresses before symbolization">;
 def use_dia: F<"dia", "Use the DIA library to access symbols (Windows only)">;
+defm use_symbol_table : B<"use-symbol-table", "Prefer function names stored in symbol table",
+                          "Don't prefer function names stored in symbol table">;
 def verbose : F<"verbose", "Print verbose line info">;
 def version : F<"version", "Display the version">;
diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
index 447c18abadc1743..07bb896389c9bcd 100644
--- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -477,7 +477,8 @@ int llvm_symbolizer_main(int argc, char **argv, const llvm::ToolContext &) {
     Opts.UseDIA = false;
-  Opts.UseSymbolTable = true;
+  Opts.UseSymbolTable =
+      Args.hasFlag(OPT_use_symbol_table, OPT_no_use_symbol_table, true);
   if (Args.hasArg(OPT_cache_size_EQ))
     parseIntArg(Args, OPT_cache_size_EQ, Opts.MaxCacheSize);
   Config.PrintAddress = Args.hasArg(OPT_addresses);

>From 15465b4e112062569b8138e5862a1786a93349ae Mon Sep 17 00:00:00 2001
From: Kevin Ding <quic_likaid at quicinc.com>
Date: Thu, 9 Nov 2023 09:48:01 +0800
Subject: [PATCH 2/3] update test

 ...-use-symbol-table.s => use-symbol-table.s} | 27 +++++++++++++------
 1 file changed, 19 insertions(+), 8 deletions(-)
 rename llvm/test/tools/llvm-symbolizer/{no-use-symbol-table.s => use-symbol-table.s} (93%)

diff --git a/llvm/test/tools/llvm-symbolizer/no-use-symbol-table.s b/llvm/test/tools/llvm-symbolizer/use-symbol-table.s
similarity index 93%
rename from llvm/test/tools/llvm-symbolizer/no-use-symbol-table.s
rename to llvm/test/tools/llvm-symbolizer/use-symbol-table.s
index 3702de25307ccd7..e1be28c14b7032f 100644
--- a/llvm/test/tools/llvm-symbolizer/no-use-symbol-table.s
+++ b/llvm/test/tools/llvm-symbolizer/use-symbol-table.s
@@ -1,15 +1,26 @@
 # REQUIRES: x86-registered-target
 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
-# RUN: llvm-addr2line --no-use-symbol-table -fe %t.o 0x1 | FileCheck %s
-# CHECK: foo
-# CHECK: relocatable.c
-# The above addr2line command prints "b" if --use-symbol-table
+# RUN: llvm-addr2line --no-use-symbol-table -f -e %t.o 0x1 | FileCheck %s --check-prefix=OFF
+# OFF: foo
+# OFF-NEXT: relocatable.c
-# Produced from the following program, compiled with clang -g -S
-# char a;
-# char b;
-# void foo() {}
+## Produced from the following program, compiled with clang -g -S
+## (clang 14.0.6 / Debian 12).
+## char a;
+## char b;
+## void foo() {}
+## nm use-symbol-table.s
+## 0000000000000000 B a
+## 0000000000000001 B b
+## 0000000000000000 T foo
+## With --use-symbol-table (default), the symbolizer tries to use the symbol
+## table to override the function name from DWARF. In this case, "b" is returned.
+# RUN: llvm-addr2line --use-symbol-table -f -e %t.o 0x1 | FileCheck %s --check-prefix=ON
+# ON: b
+# ON-NEXT: relocatable.c
 	.file	"relocatable.c"

>From cb4892f9f78c3ebf3c1c023d60effc28ac352ed8 Mon Sep 17 00:00:00 2001
From: Kevin Ding <quic_likaid at quicinc.com>
Date: Mon, 13 Nov 2023 09:34:04 +0800
Subject: [PATCH 3/3] update test 2

 .../tools/llvm-symbolizer/use-symbol-table.s    | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/llvm/test/tools/llvm-symbolizer/use-symbol-table.s b/llvm/test/tools/llvm-symbolizer/use-symbol-table.s
index e1be28c14b7032f..a9127de7c745b5c 100644
--- a/llvm/test/tools/llvm-symbolizer/use-symbol-table.s
+++ b/llvm/test/tools/llvm-symbolizer/use-symbol-table.s
@@ -5,23 +5,18 @@
 # OFF: foo
 # OFF-NEXT: relocatable.c
-## Produced from the following program, compiled with clang -g -S
-## (clang 14.0.6 / Debian 12).
-## char a;
-## char b;
-## void foo() {}
-## nm use-symbol-table.s
-## 0000000000000000 B a
-## 0000000000000001 B b
-## 0000000000000000 T foo
 ## With --use-symbol-table (default), the symbolizer tries to use the symbol
 ## table to override the function name from DWARF. In this case, "b" is returned.
 # RUN: llvm-addr2line --use-symbol-table -f -e %t.o 0x1 | FileCheck %s --check-prefix=ON
+# RUN: llvm-addr2line -f -e %t.o 0x1 | FileCheck %s --check-prefix=ON
 # ON: b
 # ON-NEXT: relocatable.c
+## Produced from the following program, compiled with clang -g -S (clang 14.0.6 / Debian 12).
+## char a;
+## char b;
+## void foo() {}
 	.file	"relocatable.c"
 	.file	0 "/workspaces/llvm-project/llvm/test/tools/llvm-symbolizer/Inputs" "relocatable.c" md5 0xee7a86d561e1ec33352f18460ecf7166

More information about the llvm-commits mailing list