[lld] [ELF] Improve undefined symbol message w/ DW_TAG_variable but w/o line number information (PR #70854)

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 31 13:34:04 PDT 2023


https://github.com/MaskRay updated https://github.com/llvm/llvm-project/pull/70854

>From 4ddc9f986890faff5630295a68db2c1160d48463 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Tue, 31 Oct 2023 12:49:26 -0700
Subject: [PATCH] [ELF] Improve undefined symbol message w/ DW_TAG_variable but
 w/o line number information

The undefined symbol message suggests the source line when line number
information is available (see https://reviews.llvm.org/D31481).
When the undefined symbol is from a global variable, we won't get the
line information.
```
extern int undef;
namespace ns {
// DW_TAG_variable(DW_AT_decl_file/DW_AT_decl_line) is available while
// line number information is unavailable.
int *var[] = {
  &undef
};
}

ld.lld: error: undefined symbol: undef
>>> referenced by undef-debug2.cc
>>>               undef-debug2.o:(ns::var)
```

This patch refactors `getEnclosingFunction` to obtain this information.
```
ld.lld: error: undefined symbol: undef
>>> referenced by undef-debug2.cc:3 (/tmp/c/undef-debug2.cc:3)
>>>               undef-debug2.o:(ns::var)
```
---
 lld/ELF/InputSection.cpp           |  14 +-
 lld/ELF/InputSection.h             |   9 +-
 lld/ELF/Relocations.cpp            |   3 +-
 lld/test/ELF/Inputs/undef-debug.s  |   1 +
 lld/test/ELF/Inputs/undef-debug2.s | 200 +++++++++++++++++++++++++++++
 lld/test/ELF/undef.s               |   9 +-
 6 files changed, 222 insertions(+), 14 deletions(-)
 create mode 100644 lld/test/ELF/Inputs/undef-debug2.s

diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 02394cbae95d557..e4ce050a789dfbb 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -241,12 +241,12 @@ InputSection *InputSectionBase::getLinkOrderDep() const {
   return cast<InputSection>(file->getSections()[link]);
 }
 
-// Find a function symbol that encloses a given location.
-Defined *InputSectionBase::getEnclosingFunction(uint64_t offset) {
+// Find a symbol that encloses a given location.
+Defined *InputSectionBase::getEnclosingSymbol(uint64_t offset, uint8_t type) {
   for (Symbol *b : file->getSymbols())
     if (Defined *d = dyn_cast<Defined>(b))
-      if (d->section == this && d->type == STT_FUNC && d->value <= offset &&
-          offset < d->value + d->size)
+      if (d->section == this && d->value <= offset &&
+          offset < d->value + d->size && (type == 0 || type == d->type))
         return d;
   return nullptr;
 }
@@ -296,10 +296,8 @@ std::string InputSectionBase::getObjMsg(uint64_t off) {
   // Find a symbol that encloses a given location. getObjMsg may be called
   // before ObjFile::initSectionsAndLocalSyms where local symbols are
   // initialized.
-  for (Symbol *b : file->getSymbols())
-    if (auto *d = dyn_cast_or_null<Defined>(b))
-      if (d->section == this && d->value <= off && off < d->value + d->size)
-        return filename + ":(" + toString(*d) + ")" + archive;
+  if (Defined *d = getEnclosingSymbol(off))
+    return filename + ":(" + toString(*d) + ")" + archive;
 
   // If there's no symbol, print out the offset in the section.
   return (filename + ":(" + name + "+0x" + utohexstr(off) + ")" + archive)
diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h
index 2b91711abba3d14..7570901b4ef9425 100644
--- a/lld/ELF/InputSection.h
+++ b/lld/ELF/InputSection.h
@@ -189,9 +189,12 @@ class InputSectionBase : public SectionBase {
 
   InputSection *getLinkOrderDep() const;
 
-  // Get the function symbol that encloses this offset from within the
-  // section.
-  Defined *getEnclosingFunction(uint64_t offset);
+  // Get a symbol that encloses this offset from within the section. If type is
+  // not zero, return a symbol with the specified type.
+  Defined *getEnclosingSymbol(uint64_t offset, uint8_t type = 0);
+  Defined *getEnclosingFunction(uint64_t offset) {
+    return getEnclosingSymbol(offset, llvm::ELF::STT_FUNC);
+  }
 
   // Returns a source location string. Used to construct an error message.
   std::string getLocation(uint64_t offset);
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index f3fb0c71a8b3064..62e80521c4558ee 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -739,7 +739,8 @@ static void reportUndefinedSymbol(const UndefinedDiag &undef,
     uint64_t offset = l.offset;
 
     msg += "\n>>> referenced by ";
-    std::string src = sec.getSrcMsg(sym, offset);
+    Symbol *enclosing = sec.getEnclosingSymbol(offset);
+    std::string src = sec.getSrcMsg(enclosing ? *enclosing : sym, offset);
     if (!src.empty())
       msg += src + "\n>>>               ";
     msg += sec.getObjMsg(offset);
diff --git a/lld/test/ELF/Inputs/undef-debug.s b/lld/test/ELF/Inputs/undef-debug.s
index 46c1c92d2b1f6e8..016a0cac60e5eaf 100644
--- a/lld/test/ELF/Inputs/undef-debug.s
+++ b/lld/test/ELF/Inputs/undef-debug.s
@@ -1,3 +1,4 @@
+## Variables with line number information
 .file 1 "dir/undef-debug.s"
 .loc 1 3
         .quad zed3
diff --git a/lld/test/ELF/Inputs/undef-debug2.s b/lld/test/ELF/Inputs/undef-debug2.s
new file mode 100644
index 000000000000000..e176969dc4e3ffc
--- /dev/null
+++ b/lld/test/ELF/Inputs/undef-debug2.s
@@ -0,0 +1,200 @@
+## Generate from:
+##
+## extern int zed9;
+## namespace ns {
+## int *var[] = {
+##   &zed9
+## };
+## }
+	.text
+	.file	"undef-debug2.cc"
+	.file	0 "dir" "undef-debug2.cc" md5 0xd7caefb836c47f6c56303f19e96f2587
+	.type	_ZN2ns3varE, at object             # @_ZN2ns3varE
+	.data
+	.globl	_ZN2ns3varE
+	.p2align	3, 0x0
+_ZN2ns3varE:
+	.quad	zed9
+	.size	_ZN2ns3varE, 8
+
+	.section	.debug_abbrev,"", at progbits
+	.byte	1                               # Abbreviation Code
+	.byte	17                              # DW_TAG_compile_unit
+	.byte	1                               # DW_CHILDREN_yes
+	.byte	37                              # DW_AT_producer
+	.byte	37                              # DW_FORM_strx1
+	.byte	19                              # DW_AT_language
+	.byte	5                               # DW_FORM_data2
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	114                             # DW_AT_str_offsets_base
+	.byte	23                              # DW_FORM_sec_offset
+	.byte	16                              # DW_AT_stmt_list
+	.byte	23                              # DW_FORM_sec_offset
+	.byte	27                              # DW_AT_comp_dir
+	.byte	37                              # DW_FORM_strx1
+	.byte	115                             # DW_AT_addr_base
+	.byte	23                              # DW_FORM_sec_offset
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	2                               # Abbreviation Code
+	.byte	57                              # DW_TAG_namespace
+	.byte	1                               # DW_CHILDREN_yes
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	3                               # Abbreviation Code
+	.byte	52                              # DW_TAG_variable
+	.byte	0                               # DW_CHILDREN_no
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	73                              # DW_AT_type
+	.byte	19                              # DW_FORM_ref4
+	.byte	63                              # DW_AT_external
+	.byte	25                              # DW_FORM_flag_present
+	.byte	58                              # DW_AT_decl_file
+	.byte	11                              # DW_FORM_data1
+	.byte	59                              # DW_AT_decl_line
+	.byte	11                              # DW_FORM_data1
+	.byte	2                               # DW_AT_location
+	.byte	24                              # DW_FORM_exprloc
+	.byte	110                             # DW_AT_linkage_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	4                               # Abbreviation Code
+	.byte	1                               # DW_TAG_array_type
+	.byte	1                               # DW_CHILDREN_yes
+	.byte	73                              # DW_AT_type
+	.byte	19                              # DW_FORM_ref4
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	5                               # Abbreviation Code
+	.byte	33                              # DW_TAG_subrange_type
+	.byte	0                               # DW_CHILDREN_no
+	.byte	73                              # DW_AT_type
+	.byte	19                              # DW_FORM_ref4
+	.byte	55                              # DW_AT_count
+	.byte	11                              # DW_FORM_data1
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	6                               # Abbreviation Code
+	.byte	15                              # DW_TAG_pointer_type
+	.byte	0                               # DW_CHILDREN_no
+	.byte	73                              # DW_AT_type
+	.byte	19                              # DW_FORM_ref4
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	7                               # Abbreviation Code
+	.byte	36                              # DW_TAG_base_type
+	.byte	0                               # DW_CHILDREN_no
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	62                              # DW_AT_encoding
+	.byte	11                              # DW_FORM_data1
+	.byte	11                              # DW_AT_byte_size
+	.byte	11                              # DW_FORM_data1
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	8                               # Abbreviation Code
+	.byte	36                              # DW_TAG_base_type
+	.byte	0                               # DW_CHILDREN_no
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	11                              # DW_AT_byte_size
+	.byte	11                              # DW_FORM_data1
+	.byte	62                              # DW_AT_encoding
+	.byte	11                              # DW_FORM_data1
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	0                               # EOM(3)
+	.section	.debug_info,"", at progbits
+.Lcu_begin0:
+	.long	.Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+	.short	5                               # DWARF version number
+	.byte	1                               # DWARF Unit Type
+	.byte	8                               # Address Size (in bytes)
+	.long	.debug_abbrev                   # Offset Into Abbrev. Section
+	.byte	1                               # Abbrev [1] 0xc:0x3b DW_TAG_compile_unit
+	.byte	0                               # DW_AT_producer
+	.short	33                              # DW_AT_language
+	.byte	1                               # DW_AT_name
+	.long	.Lstr_offsets_base0             # DW_AT_str_offsets_base
+	.long	.Lline_table_start0             # DW_AT_stmt_list
+	.byte	2                               # DW_AT_comp_dir
+	.long	.Laddr_table_base0              # DW_AT_addr_base
+	.byte	2                               # Abbrev [2] 0x1e:0xf DW_TAG_namespace
+	.byte	3                               # DW_AT_name
+	.byte	3                               # Abbrev [3] 0x20:0xc DW_TAG_variable
+	.byte	4                               # DW_AT_name
+	.long	45                              # DW_AT_type
+                                        # DW_AT_external
+	.byte	0                               # DW_AT_decl_file
+	.byte	3                               # DW_AT_decl_line
+	.byte	2                               # DW_AT_location
+	.byte	161
+	.byte	0
+	.byte	7                               # DW_AT_linkage_name
+	.byte	0                               # End Of Children Mark
+	.byte	4                               # Abbrev [4] 0x2d:0xc DW_TAG_array_type
+	.long	57                              # DW_AT_type
+	.byte	5                               # Abbrev [5] 0x32:0x6 DW_TAG_subrange_type
+	.long	66                              # DW_AT_type
+	.byte	1                               # DW_AT_count
+	.byte	0                               # End Of Children Mark
+	.byte	6                               # Abbrev [6] 0x39:0x5 DW_TAG_pointer_type
+	.long	62                              # DW_AT_type
+	.byte	7                               # Abbrev [7] 0x3e:0x4 DW_TAG_base_type
+	.byte	5                               # DW_AT_name
+	.byte	5                               # DW_AT_encoding
+	.byte	4                               # DW_AT_byte_size
+	.byte	8                               # Abbrev [8] 0x42:0x4 DW_TAG_base_type
+	.byte	6                               # DW_AT_name
+	.byte	8                               # DW_AT_byte_size
+	.byte	7                               # DW_AT_encoding
+	.byte	0                               # End Of Children Mark
+.Ldebug_info_end0:
+	.section	.debug_str_offsets,"", at progbits
+	.long	36                              # Length of String Offsets Set
+	.short	5
+	.short	0
+.Lstr_offsets_base0:
+	.section	.debug_str,"MS", at progbits,1
+.Linfo_string0:
+	.asciz	"clang version 18.0.0"          # string offset=0
+.Linfo_string1:
+	.asciz	"undef-debug2.cc"               # string offset=21
+.Linfo_string2:
+	.asciz	"dir"                           # string offset=37
+.Linfo_string3:
+	.asciz	"ns"                            # string offset=44
+.Linfo_string4:
+	.asciz	"var"                           # string offset=47
+.Linfo_string5:
+	.asciz	"int"                           # string offset=51
+.Linfo_string6:
+	.asciz	"__ARRAY_SIZE_TYPE__"           # string offset=55
+.Linfo_string7:
+	.asciz	"_ZN2ns3varE"                   # string offset=75
+	.section	.debug_str_offsets,"", at progbits
+	.long	.Linfo_string0
+	.long	.Linfo_string1
+	.long	.Linfo_string2
+	.long	.Linfo_string3
+	.long	.Linfo_string4
+	.long	.Linfo_string5
+	.long	.Linfo_string6
+	.long	.Linfo_string7
+	.section	.debug_addr,"", at progbits
+	.long	.Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution
+.Ldebug_addr_start0:
+	.short	5                               # DWARF version number
+	.byte	8                               # Address size
+	.byte	0                               # Segment selector size
+.Laddr_table_base0:
+	.quad	_ZN2ns3varE
+.Ldebug_addr_end0:
+	.section	.debug_line,"", at progbits
+.Lline_table_start0:
diff --git a/lld/test/ELF/undef.s b/lld/test/ELF/undef.s
index 2b42ae12be2c129..009b8bf6d23b410 100644
--- a/lld/test/ELF/undef.s
+++ b/lld/test/ELF/undef.s
@@ -3,11 +3,12 @@
 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/undef.s -o %t2.o
 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/undef-debug.s -o %t3.o
 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/undef-bad-debug.s -o %t4.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/undef-debug2.s -o %t5.o
 # RUN: rm -f %t2.a
 # RUN: llvm-ar rc %t2.a %t2.o
-# RUN: not ld.lld --threads=1 %t.o %t2.a %t3.o %t4.o -o /dev/null 2>&1 \
+# RUN: not ld.lld --threads=1 %t.o %t2.a %t3.o %t4.o %t5.o -o /dev/null 2>&1 \
 # RUN:   | FileCheck %s --implicit-check-not="error:" --implicit-check-not="warning:"
-# RUN: not ld.lld --threads=1 -pie %t.o %t2.a %t3.o %t4.o -o /dev/null 2>&1 \
+# RUN: not ld.lld --threads=1 -pie %t.o %t2.a %t3.o %t4.o %t5.o -o /dev/null 2>&1 \
 # RUN:   | FileCheck %s --implicit-check-not="error:" --implicit-check-not="warning:"
 
 # CHECK:      error: undefined symbol: foo
@@ -82,6 +83,10 @@
 # CHECK-NEXT: >>> referenced by undef-bad-debug2.s:11 (dir2{{/|\\}}undef-bad-debug2.s:11)
 # CHECK-NEXT: >>>               {{.*}}tmp4.o:(.text+0x18)
 
+# CHECK:      error: undefined symbol: zed9
+# CHECK-NEXT: >>> referenced by undef-debug2.cc:3 (dir{{/|\\}}undef-debug2.cc:3)
+# CHECK-NEXT: >>>               {{.*}}tmp5.o:(ns::var)
+
 # RUN: not ld.lld %t.o %t2.a -o /dev/null -no-demangle 2>&1 | \
 # RUN:   FileCheck -check-prefix=NO-DEMANGLE %s
 # NO-DEMANGLE: error: undefined symbol: _Z3fooi



More information about the llvm-commits mailing list