[lld] [ELF] Improve undefined symbol message w/ DW_TAG_variable but w/o line number information (PR #70854)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 31 13:27:39 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-lld
Author: Fangrui Song (MaskRay)
<details>
<summary>Changes</summary>
The undefined symbol message suggests the source line when line number
information is available (see https://reviews.llvm.org/D31481).
When the undefined symbol is from a global variable, we won't get the
line information.
```
extern int undef;
namespace ns {
// DW_TAG_variable(DW_AT_decl_file/DW_AT_decl_line) is available while
// line number information is unavailable.
int *var[] = {
&undef
};
}
```
This patch refactors `getEnclosingFunction` to obtain this information.
---
Full diff: https://github.com/llvm/llvm-project/pull/70854.diff
6 Files Affected:
- (modified) lld/ELF/InputSection.cpp (+6-8)
- (modified) lld/ELF/InputSection.h (+6-3)
- (modified) lld/ELF/Relocations.cpp (+2-1)
- (modified) lld/test/ELF/Inputs/undef-debug.s (+1)
- (added) lld/test/ELF/Inputs/undef-debug2.s (+200)
- (modified) lld/test/ELF/undef.s (+7-2)
``````````diff
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 02394cbae95d557..e4ce050a789dfbb 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -241,12 +241,12 @@ InputSection *InputSectionBase::getLinkOrderDep() const {
return cast<InputSection>(file->getSections()[link]);
}
-// Find a function symbol that encloses a given location.
-Defined *InputSectionBase::getEnclosingFunction(uint64_t offset) {
+// Find a symbol that encloses a given location.
+Defined *InputSectionBase::getEnclosingSymbol(uint64_t offset, uint8_t type) {
for (Symbol *b : file->getSymbols())
if (Defined *d = dyn_cast<Defined>(b))
- if (d->section == this && d->type == STT_FUNC && d->value <= offset &&
- offset < d->value + d->size)
+ if (d->section == this && d->value <= offset &&
+ offset < d->value + d->size && (type == 0 || type == d->type))
return d;
return nullptr;
}
@@ -296,10 +296,8 @@ std::string InputSectionBase::getObjMsg(uint64_t off) {
// Find a symbol that encloses a given location. getObjMsg may be called
// before ObjFile::initSectionsAndLocalSyms where local symbols are
// initialized.
- for (Symbol *b : file->getSymbols())
- if (auto *d = dyn_cast_or_null<Defined>(b))
- if (d->section == this && d->value <= off && off < d->value + d->size)
- return filename + ":(" + toString(*d) + ")" + archive;
+ if (Defined *d = getEnclosingSymbol(off))
+ return filename + ":(" + toString(*d) + ")" + archive;
// If there's no symbol, print out the offset in the section.
return (filename + ":(" + name + "+0x" + utohexstr(off) + ")" + archive)
diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h
index 2b91711abba3d14..7570901b4ef9425 100644
--- a/lld/ELF/InputSection.h
+++ b/lld/ELF/InputSection.h
@@ -189,9 +189,12 @@ class InputSectionBase : public SectionBase {
InputSection *getLinkOrderDep() const;
- // Get the function symbol that encloses this offset from within the
- // section.
- Defined *getEnclosingFunction(uint64_t offset);
+ // Get a symbol that encloses this offset from within the section. If type is
+ // not zero, return a symbol with the specified type.
+ Defined *getEnclosingSymbol(uint64_t offset, uint8_t type = 0);
+ Defined *getEnclosingFunction(uint64_t offset) {
+ return getEnclosingSymbol(offset, llvm::ELF::STT_FUNC);
+ }
// Returns a source location string. Used to construct an error message.
std::string getLocation(uint64_t offset);
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index f3fb0c71a8b3064..62e80521c4558ee 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -739,7 +739,8 @@ static void reportUndefinedSymbol(const UndefinedDiag &undef,
uint64_t offset = l.offset;
msg += "\n>>> referenced by ";
- std::string src = sec.getSrcMsg(sym, offset);
+ Symbol *enclosing = sec.getEnclosingSymbol(offset);
+ std::string src = sec.getSrcMsg(enclosing ? *enclosing : sym, offset);
if (!src.empty())
msg += src + "\n>>> ";
msg += sec.getObjMsg(offset);
diff --git a/lld/test/ELF/Inputs/undef-debug.s b/lld/test/ELF/Inputs/undef-debug.s
index 46c1c92d2b1f6e8..016a0cac60e5eaf 100644
--- a/lld/test/ELF/Inputs/undef-debug.s
+++ b/lld/test/ELF/Inputs/undef-debug.s
@@ -1,3 +1,4 @@
+## Variables with line number information
.file 1 "dir/undef-debug.s"
.loc 1 3
.quad zed3
diff --git a/lld/test/ELF/Inputs/undef-debug2.s b/lld/test/ELF/Inputs/undef-debug2.s
new file mode 100644
index 000000000000000..e176969dc4e3ffc
--- /dev/null
+++ b/lld/test/ELF/Inputs/undef-debug2.s
@@ -0,0 +1,200 @@
+## Generate from:
+##
+## extern int zed9;
+## namespace ns {
+## int *var[] = {
+## &zed9
+## };
+## }
+ .text
+ .file "undef-debug2.cc"
+ .file 0 "dir" "undef-debug2.cc" md5 0xd7caefb836c47f6c56303f19e96f2587
+ .type _ZN2ns3varE, at object # @_ZN2ns3varE
+ .data
+ .globl _ZN2ns3varE
+ .p2align 3, 0x0
+_ZN2ns3varE:
+ .quad zed9
+ .size _ZN2ns3varE, 8
+
+ .section .debug_abbrev,"", at progbits
+ .byte 1 # Abbreviation Code
+ .byte 17 # DW_TAG_compile_unit
+ .byte 1 # DW_CHILDREN_yes
+ .byte 37 # DW_AT_producer
+ .byte 37 # DW_FORM_strx1
+ .byte 19 # DW_AT_language
+ .byte 5 # DW_FORM_data2
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 114 # DW_AT_str_offsets_base
+ .byte 23 # DW_FORM_sec_offset
+ .byte 16 # DW_AT_stmt_list
+ .byte 23 # DW_FORM_sec_offset
+ .byte 27 # DW_AT_comp_dir
+ .byte 37 # DW_FORM_strx1
+ .byte 115 # DW_AT_addr_base
+ .byte 23 # DW_FORM_sec_offset
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 2 # Abbreviation Code
+ .byte 57 # DW_TAG_namespace
+ .byte 1 # DW_CHILDREN_yes
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 3 # Abbreviation Code
+ .byte 52 # DW_TAG_variable
+ .byte 0 # DW_CHILDREN_no
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 63 # DW_AT_external
+ .byte 25 # DW_FORM_flag_present
+ .byte 58 # DW_AT_decl_file
+ .byte 11 # DW_FORM_data1
+ .byte 59 # DW_AT_decl_line
+ .byte 11 # DW_FORM_data1
+ .byte 2 # DW_AT_location
+ .byte 24 # DW_FORM_exprloc
+ .byte 110 # DW_AT_linkage_name
+ .byte 37 # DW_FORM_strx1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 4 # Abbreviation Code
+ .byte 1 # DW_TAG_array_type
+ .byte 1 # DW_CHILDREN_yes
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 5 # Abbreviation Code
+ .byte 33 # DW_TAG_subrange_type
+ .byte 0 # DW_CHILDREN_no
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 55 # DW_AT_count
+ .byte 11 # DW_FORM_data1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 6 # Abbreviation Code
+ .byte 15 # DW_TAG_pointer_type
+ .byte 0 # DW_CHILDREN_no
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 7 # Abbreviation Code
+ .byte 36 # DW_TAG_base_type
+ .byte 0 # DW_CHILDREN_no
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 62 # DW_AT_encoding
+ .byte 11 # DW_FORM_data1
+ .byte 11 # DW_AT_byte_size
+ .byte 11 # DW_FORM_data1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 8 # Abbreviation Code
+ .byte 36 # DW_TAG_base_type
+ .byte 0 # DW_CHILDREN_no
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 11 # DW_AT_byte_size
+ .byte 11 # DW_FORM_data1
+ .byte 62 # DW_AT_encoding
+ .byte 11 # DW_FORM_data1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+ .section .debug_info,"", at progbits
+.Lcu_begin0:
+ .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+ .short 5 # DWARF version number
+ .byte 1 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev # Offset Into Abbrev. Section
+ .byte 1 # Abbrev [1] 0xc:0x3b DW_TAG_compile_unit
+ .byte 0 # DW_AT_producer
+ .short 33 # DW_AT_language
+ .byte 1 # DW_AT_name
+ .long .Lstr_offsets_base0 # DW_AT_str_offsets_base
+ .long .Lline_table_start0 # DW_AT_stmt_list
+ .byte 2 # DW_AT_comp_dir
+ .long .Laddr_table_base0 # DW_AT_addr_base
+ .byte 2 # Abbrev [2] 0x1e:0xf DW_TAG_namespace
+ .byte 3 # DW_AT_name
+ .byte 3 # Abbrev [3] 0x20:0xc DW_TAG_variable
+ .byte 4 # DW_AT_name
+ .long 45 # DW_AT_type
+ # DW_AT_external
+ .byte 0 # DW_AT_decl_file
+ .byte 3 # DW_AT_decl_line
+ .byte 2 # DW_AT_location
+ .byte 161
+ .byte 0
+ .byte 7 # DW_AT_linkage_name
+ .byte 0 # End Of Children Mark
+ .byte 4 # Abbrev [4] 0x2d:0xc DW_TAG_array_type
+ .long 57 # DW_AT_type
+ .byte 5 # Abbrev [5] 0x32:0x6 DW_TAG_subrange_type
+ .long 66 # DW_AT_type
+ .byte 1 # DW_AT_count
+ .byte 0 # End Of Children Mark
+ .byte 6 # Abbrev [6] 0x39:0x5 DW_TAG_pointer_type
+ .long 62 # DW_AT_type
+ .byte 7 # Abbrev [7] 0x3e:0x4 DW_TAG_base_type
+ .byte 5 # DW_AT_name
+ .byte 5 # DW_AT_encoding
+ .byte 4 # DW_AT_byte_size
+ .byte 8 # Abbrev [8] 0x42:0x4 DW_TAG_base_type
+ .byte 6 # DW_AT_name
+ .byte 8 # DW_AT_byte_size
+ .byte 7 # DW_AT_encoding
+ .byte 0 # End Of Children Mark
+.Ldebug_info_end0:
+ .section .debug_str_offsets,"", at progbits
+ .long 36 # Length of String Offsets Set
+ .short 5
+ .short 0
+.Lstr_offsets_base0:
+ .section .debug_str,"MS", at progbits,1
+.Linfo_string0:
+ .asciz "clang version 18.0.0" # string offset=0
+.Linfo_string1:
+ .asciz "undef-debug2.cc" # string offset=21
+.Linfo_string2:
+ .asciz "dir" # string offset=37
+.Linfo_string3:
+ .asciz "ns" # string offset=44
+.Linfo_string4:
+ .asciz "var" # string offset=47
+.Linfo_string5:
+ .asciz "int" # string offset=51
+.Linfo_string6:
+ .asciz "__ARRAY_SIZE_TYPE__" # string offset=55
+.Linfo_string7:
+ .asciz "_ZN2ns3varE" # string offset=75
+ .section .debug_str_offsets,"", at progbits
+ .long .Linfo_string0
+ .long .Linfo_string1
+ .long .Linfo_string2
+ .long .Linfo_string3
+ .long .Linfo_string4
+ .long .Linfo_string5
+ .long .Linfo_string6
+ .long .Linfo_string7
+ .section .debug_addr,"", at progbits
+ .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution
+.Ldebug_addr_start0:
+ .short 5 # DWARF version number
+ .byte 8 # Address size
+ .byte 0 # Segment selector size
+.Laddr_table_base0:
+ .quad _ZN2ns3varE
+.Ldebug_addr_end0:
+ .section .debug_line,"", at progbits
+.Lline_table_start0:
diff --git a/lld/test/ELF/undef.s b/lld/test/ELF/undef.s
index 2b42ae12be2c129..009b8bf6d23b410 100644
--- a/lld/test/ELF/undef.s
+++ b/lld/test/ELF/undef.s
@@ -3,11 +3,12 @@
# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/undef.s -o %t2.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/undef-debug.s -o %t3.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/undef-bad-debug.s -o %t4.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/undef-debug2.s -o %t5.o
# RUN: rm -f %t2.a
# RUN: llvm-ar rc %t2.a %t2.o
-# RUN: not ld.lld --threads=1 %t.o %t2.a %t3.o %t4.o -o /dev/null 2>&1 \
+# RUN: not ld.lld --threads=1 %t.o %t2.a %t3.o %t4.o %t5.o -o /dev/null 2>&1 \
# RUN: | FileCheck %s --implicit-check-not="error:" --implicit-check-not="warning:"
-# RUN: not ld.lld --threads=1 -pie %t.o %t2.a %t3.o %t4.o -o /dev/null 2>&1 \
+# RUN: not ld.lld --threads=1 -pie %t.o %t2.a %t3.o %t4.o %t5.o -o /dev/null 2>&1 \
# RUN: | FileCheck %s --implicit-check-not="error:" --implicit-check-not="warning:"
# CHECK: error: undefined symbol: foo
@@ -82,6 +83,10 @@
# CHECK-NEXT: >>> referenced by undef-bad-debug2.s:11 (dir2{{/|\\}}undef-bad-debug2.s:11)
# CHECK-NEXT: >>> {{.*}}tmp4.o:(.text+0x18)
+# CHECK: error: undefined symbol: zed9
+# CHECK-NEXT: >>> referenced by undef-debug2.cc:3 (dir{{/|\\}}undef-debug2.cc:3)
+# CHECK-NEXT: >>> {{.*}}tmp5.o:(ns::var)
+
# RUN: not ld.lld %t.o %t2.a -o /dev/null -no-demangle 2>&1 | \
# RUN: FileCheck -check-prefix=NO-DEMANGLE %s
# NO-DEMANGLE: error: undefined symbol: _Z3fooi
``````````
</details>
https://github.com/llvm/llvm-project/pull/70854
More information about the llvm-commits
mailing list