[lld] r317080 - [ELF] - Teach LLD to report line numbers for data symbols.

George Rimar via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 1 00:42:38 PDT 2017


Author: grimar
Date: Wed Nov  1 00:42:38 2017
New Revision: 317080

URL: http://llvm.org/viewvc/llvm-project?rev=317080&view=rev
Log:
[ELF] - Teach LLD to report line numbers for data symbols.

This is PR34826.

Currently LLD is unable to report line number when reporting
duplicate declaration of some variable.

That happens because for extracting line information we always use
.debug_line section content which describes mapping from machine
instructions to source file locations, what does not help for
variables as does not describe them.

In this patch I am taking the approproate information about
variables locations from the .debug_info section.

Differential revision: https://reviews.llvm.org/D38721

Added:
    lld/trunk/test/ELF/conflict-debug-variable.s
Modified:
    lld/trunk/ELF/InputFiles.cpp
    lld/trunk/ELF/InputFiles.h
    lld/trunk/ELF/InputSection.cpp
    lld/trunk/ELF/InputSection.h
    lld/trunk/ELF/Relocations.cpp
    lld/trunk/ELF/SymbolTable.cpp

Modified: lld/trunk/ELF/InputFiles.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/InputFiles.cpp?rev=317080&r1=317079&r2=317080&view=diff
==============================================================================
--- lld/trunk/ELF/InputFiles.cpp (original)
+++ lld/trunk/ELF/InputFiles.cpp Wed Nov  1 00:42:38 2017
@@ -67,7 +67,7 @@ Optional<MemoryBufferRef> elf::readFile(
   return MBRef;
 }
 
-template <class ELFT> void ObjFile<ELFT>::initializeDwarfLine() {
+template <class ELFT> void ObjFile<ELFT>::initializeDwarf() {
   DWARFContext Dwarf(make_unique<LLDDwarfObj<ELFT>>(this));
   const DWARFObject &Obj = Dwarf.getDWARFObj();
   DwarfLine.reset(new DWARFDebugLine);
@@ -77,7 +77,68 @@ template <class ELFT> void ObjFile<ELFT>
   // The second parameter is offset in .debug_line section
   // for compilation unit (CU) of interest. We have only one
   // CU (object file), so offset is always 0.
-  DwarfLine->getOrParseLineTable(LineData, 0);
+  const DWARFDebugLine::LineTable *LT =
+      DwarfLine->getOrParseLineTable(LineData, 0);
+
+  // Return if there is no debug information about CU available.
+  if (!Dwarf.getNumCompileUnits())
+    return;
+
+  // Loop over variable records and insert them to VariableLoc.
+  DWARFCompileUnit *CU = Dwarf.getCompileUnitAtIndex(0);
+  for (const auto &Entry : CU->dies()) {
+    DWARFDie Die(CU, &Entry);
+    // Skip all tags that are not variables.
+    if (Die.getTag() != dwarf::DW_TAG_variable)
+      continue;
+
+    // Skip if a local variable because we don't need them for generating error
+    // messages. In general, only non-local symbols can fail to be linked.
+    if (!dwarf::toUnsigned(Die.find(dwarf::DW_AT_external), 0))
+      continue;
+
+    // Get the source filename index for the variable.
+    unsigned File = dwarf::toUnsigned(Die.find(dwarf::DW_AT_decl_file), 0);
+    if (!LT->hasFileAtIndex(File))
+      continue;
+
+    // Get the line number on which the variable is declared.
+    unsigned Line = dwarf::toUnsigned(Die.find(dwarf::DW_AT_decl_line), 0);
+
+    // Get the name of the variable and add the collected information to
+    // VariableLoc. Usually Name is non-empty, but it can be empty if the input
+    // object file lacks some debug info.
+    StringRef Name = dwarf::toString(Die.find(dwarf::DW_AT_name), "");
+    if (!Name.empty())
+      VariableLoc.insert({Name, {File, Line}});
+  }
+}
+
+// Returns the pair of file name and line number describing location of data
+// object (variable, array, etc) definition.
+template <class ELFT>
+Optional<std::pair<std::string, unsigned>>
+ObjFile<ELFT>::getVariableLoc(StringRef Name) {
+  llvm::call_once(InitDwarfLine, [this]() { initializeDwarf(); });
+
+  // There is always only one CU so it's offset is 0.
+  const DWARFDebugLine::LineTable *LT = DwarfLine->getLineTable(0);
+  if (!LT)
+    return None;
+
+  // Return if we have no debug information about data object.
+  auto It = VariableLoc.find(Name);
+  if (It == VariableLoc.end())
+    return None;
+
+  // Take file name string from line table.
+  std::string FileName;
+  if (!LT->getFileNameByIndex(
+          It->second.first /* File */, nullptr,
+          DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, FileName))
+    return None;
+
+  return std::make_pair(FileName, It->second.second /*Line*/);
 }
 
 // Returns source line information for a given offset
@@ -85,7 +146,7 @@ template <class ELFT> void ObjFile<ELFT>
 template <class ELFT>
 Optional<DILineInfo> ObjFile<ELFT>::getDILineInfo(InputSectionBase *S,
                                                   uint64_t Offset) {
-  llvm::call_once(InitDwarfLine, [this]() { initializeDwarfLine(); });
+  llvm::call_once(InitDwarfLine, [this]() { initializeDwarf(); });
 
   // The offset to CU is 0.
   const DWARFDebugLine::LineTable *Tbl = DwarfLine->getLineTable(0);

Modified: lld/trunk/ELF/InputFiles.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/InputFiles.h?rev=317080&r1=317079&r2=317080&view=diff
==============================================================================
--- lld/trunk/ELF/InputFiles.h (original)
+++ lld/trunk/ELF/InputFiles.h Wed Nov  1 00:42:38 2017
@@ -185,6 +185,7 @@ public:
   // If no information is available, returns "".
   std::string getLineInfo(InputSectionBase *S, uint64_t Offset);
   llvm::Optional<llvm::DILineInfo> getDILineInfo(InputSectionBase *, uint64_t);
+  llvm::Optional<std::pair<std::string, unsigned>> getVariableLoc(StringRef Name);
 
   // MIPS GP0 value defined by this file. This value represents the gp value
   // used to create the relocatable object and required to support
@@ -200,7 +201,7 @@ private:
   void
   initializeSections(llvm::DenseSet<llvm::CachedHashStringRef> &ComdatGroups);
   void initializeSymbols();
-  void initializeDwarfLine();
+  void initializeDwarf();
   InputSectionBase *getRelocTarget(const Elf_Shdr &Sec);
   InputSectionBase *createInputSection(const Elf_Shdr &Sec);
   StringRef getSectionName(const Elf_Shdr &Sec);
@@ -216,6 +217,7 @@ private:
   // single object file, so we cache debugging information in order to
   // parse it only once for each object file we link.
   std::unique_ptr<llvm::DWARFDebugLine> DwarfLine;
+  llvm::DenseMap<StringRef, std::pair<unsigned, unsigned>> VariableLoc;
   llvm::once_flag InitDwarfLine;
 };
 

Modified: lld/trunk/ELF/InputSection.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/InputSection.cpp?rev=317080&r1=317079&r2=317080&view=diff
==============================================================================
--- lld/trunk/ELF/InputSection.cpp (original)
+++ lld/trunk/ELF/InputSection.cpp Wed Nov  1 00:42:38 2017
@@ -261,31 +261,41 @@ std::string InputSectionBase::getLocatio
   return (SrcFile + ":(" + Name + "+0x" + utohexstr(Offset) + ")").str();
 }
 
-// Returns a source location string. This function is intended to be
-// used for constructing an error message. The returned message looks
-// like this:
+// Concatenates arguments to construct a string representing an error location.
+static std::string createFileLineMsg(StringRef Path, unsigned Line) {
+  std::string Filename = path::filename(Path);
+  std::string Lineno = ":" + std::to_string(Line);
+  if (Filename == Path)
+    return Filename + Lineno;
+  return Filename + Lineno + " (" + Path.str() + Lineno + ")";
+}
+
+// This function is intended to be used for constructing an error message.
+// The returned message looks like this:
 //
 //   foo.c:42 (/home/alice/possibly/very/long/path/foo.c:42)
 //
-// Returns an empty string if there's no way to get line info.
-template <class ELFT> std::string InputSectionBase::getSrcMsg(uint64_t Offset) {
+//  Returns an empty string if there's no way to get line info.
+template <class ELFT>
+std::string InputSectionBase::getSrcMsg(const SymbolBody &Sym,
+                                        uint64_t Offset) {
   // Synthetic sections don't have input files.
   ObjFile<ELFT> *File = getFile<ELFT>();
   if (!File)
     return "";
 
-  Optional<DILineInfo> Info = File->getDILineInfo(this, Offset);
+  // In DWARF, functions and variables are stored to different places.
+  // First, lookup a function for a given offset.
+  if (Optional<DILineInfo> Info = File->getDILineInfo(this, Offset))
+    return createFileLineMsg(Info->FileName, Info->Line);
+
+  // If it failed, lookup again as a variable.
+  if (Optional<std::pair<std::string, unsigned>> FileLine =
+          File->getVariableLoc(Sym.getName()))
+    return createFileLineMsg(FileLine->first, FileLine->second);
 
   // File->SourceFile contains STT_FILE symbol, and that is a last resort.
-  if (!Info)
-    return File->SourceFile;
-
-  std::string Path = Info->FileName;
-  std::string Filename = path::filename(Path);
-  std::string Lineno = ":" + std::to_string(Info->Line);
-  if (Filename == Path)
-    return Filename + Lineno;
-  return Filename + Lineno + " (" + Path + Lineno + ")";
+  return File->SourceFile;
 }
 
 // Returns a filename string along with an optional section name. This
@@ -1004,10 +1014,14 @@ template std::string InputSectionBase::g
 template std::string InputSectionBase::getLocation<ELF64LE>(uint64_t);
 template std::string InputSectionBase::getLocation<ELF64BE>(uint64_t);
 
-template std::string InputSectionBase::getSrcMsg<ELF32LE>(uint64_t);
-template std::string InputSectionBase::getSrcMsg<ELF32BE>(uint64_t);
-template std::string InputSectionBase::getSrcMsg<ELF64LE>(uint64_t);
-template std::string InputSectionBase::getSrcMsg<ELF64BE>(uint64_t);
+template std::string InputSectionBase::getSrcMsg<ELF32LE>(const SymbolBody &,
+                                                          uint64_t);
+template std::string InputSectionBase::getSrcMsg<ELF32BE>(const SymbolBody &,
+                                                          uint64_t);
+template std::string InputSectionBase::getSrcMsg<ELF64LE>(const SymbolBody &,
+                                                          uint64_t);
+template std::string InputSectionBase::getSrcMsg<ELF64BE>(const SymbolBody &,
+                                                          uint64_t);
 
 template void InputSection::writeTo<ELF32LE>(uint8_t *);
 template void InputSection::writeTo<ELF32BE>(uint8_t *);

Modified: lld/trunk/ELF/InputSection.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/InputSection.h?rev=317080&r1=317079&r2=317080&view=diff
==============================================================================
--- lld/trunk/ELF/InputSection.h (original)
+++ lld/trunk/ELF/InputSection.h Wed Nov  1 00:42:38 2017
@@ -177,7 +177,7 @@ public:
 
   // Returns a source location string. Used to construct an error message.
   template <class ELFT> std::string getLocation(uint64_t Offset);
-  template <class ELFT> std::string getSrcMsg(uint64_t Offset);
+  template <class ELFT> std::string getSrcMsg(const SymbolBody &Sym, uint64_t Offset);
   std::string getObjMsg(uint64_t Offset);
 
   // Each section knows how to relocate itself. These functions apply

Modified: lld/trunk/ELF/Relocations.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Relocations.cpp?rev=317080&r1=317079&r2=317080&view=diff
==============================================================================
--- lld/trunk/ELF/Relocations.cpp (original)
+++ lld/trunk/ELF/Relocations.cpp Wed Nov  1 00:42:38 2017
@@ -74,7 +74,7 @@ static std::string getLocation(InputSect
                                uint64_t Off) {
   std::string Msg =
       "\n>>> defined in " + toString(Sym.getFile()) + "\n>>> referenced by ";
-  std::string Src = S.getSrcMsg<ELFT>(Off);
+  std::string Src = S.getSrcMsg<ELFT>(Sym, Off);
   if (!Src.empty())
     Msg += Src + "\n>>>               ";
   return Msg + S.getObjMsg(Off);
@@ -728,7 +728,7 @@ static bool maybeReportUndefined(SymbolB
   std::string Msg =
       "undefined symbol: " + toString(Sym) + "\n>>> referenced by ";
 
-  std::string Src = Sec.getSrcMsg<ELFT>(Offset);
+  std::string Src = Sec.getSrcMsg<ELFT>(Sym, Offset);
   if (!Src.empty())
     Msg += Src + "\n>>>               ";
   Msg += Sec.getObjMsg(Offset);

Modified: lld/trunk/ELF/SymbolTable.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/SymbolTable.cpp?rev=317080&r1=317079&r2=317080&view=diff
==============================================================================
--- lld/trunk/ELF/SymbolTable.cpp (original)
+++ lld/trunk/ELF/SymbolTable.cpp Wed Nov  1 00:42:38 2017
@@ -456,9 +456,9 @@ static void reportDuplicate(SymbolBody *
   //   >>> defined at baz.c:563
   //   >>>            baz.o in archive libbaz.a
   auto *Sec1 = cast<InputSectionBase>(D->Section);
-  std::string Src1 = Sec1->getSrcMsg<ELFT>(D->Value);
+  std::string Src1 = Sec1->getSrcMsg<ELFT>(*Sym, D->Value);
   std::string Obj1 = Sec1->getObjMsg(D->Value);
-  std::string Src2 = ErrSec->getSrcMsg<ELFT>(ErrOffset);
+  std::string Src2 = ErrSec->getSrcMsg<ELFT>(*Sym, ErrOffset);
   std::string Obj2 = ErrSec->getObjMsg(ErrOffset);
 
   std::string Msg = "duplicate symbol: " + toString(*Sym) + "\n>>> defined at ";

Added: lld/trunk/test/ELF/conflict-debug-variable.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/conflict-debug-variable.s?rev=317080&view=auto
==============================================================================
--- lld/trunk/test/ELF/conflict-debug-variable.s (added)
+++ lld/trunk/test/ELF/conflict-debug-variable.s Wed Nov  1 00:42:38 2017
@@ -0,0 +1,141 @@
+# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o
+# RUN: llvm-dwarfdump %t.o | FileCheck -check-prefix=INPUT %s
+# RUN: not ld.lld %t.o %t.o -o %t 2>&1 | FileCheck %s
+
+# INPUT:     .debug_info contents:
+# INPUT:       DW_TAG_variable
+# INPUT-NEXT:    DW_AT_name      ("foo")
+# INPUT-NEXT:    DW_AT_decl_file ("1.c")
+# INPUT-NEXT:    DW_AT_decl_line (1)
+# INPUT-NEXT:    DW_AT_type      (cu + 0x0032 "int")
+# INPUT-NEXT:    DW_AT_external  (true)
+# INPUT-NEXT:    DW_AT_location  (DW_OP_addr 0x0)
+# INPUT:       DW_TAG_variable
+# INPUT-NEXT:    DW_AT_name      ("bar")
+# INPUT-NEXT:    DW_AT_decl_file ("1.c")
+# INPUT-NEXT:    DW_AT_decl_line (2)
+# INPUT-NEXT:    DW_AT_type      (cu + 0x0032 "int")
+# INPUT-NEXT:    DW_AT_external  (true)
+# INPUT-NEXT:    DW_AT_location  (DW_OP_addr 0x0)
+
+## Check we use information from .debug_info in messages.
+# CHECK:      duplicate symbol: bar
+# CHECK-NEXT: >>> defined at 1.c:2
+# CHECK-NEXT: >>>            {{.*}}:(bar)
+# CHECK-NEXT: >>> defined at 1.c:2
+# CHECK-NEXT: >>>            {{.*}}:(.data+0x0)
+# CHECK:      duplicate symbol: foo
+# CHECK-NEXT: >>> defined at 1.c:1
+# CHECK-NEXT: >>>            {{.*}}:(foo)
+# CHECK-NEXT: >>> defined at 1.c:1
+# CHECK-NEXT: >>>            {{.*}}:(.bss+0x0)
+
+# Used reduced output from following code and gcc 7.1.0
+# to produce this input file:
+# Source (1.c):
+#  int foo = 0;
+#  int bar = 1;
+# Invocation: g++ -g -S 1.c
+
+.bss
+.globl  foo
+.type  foo, @object
+.size  foo, 4
+foo:
+
+.data
+.globl  bar
+.type  bar, @object
+.size  bar, 4
+bar:
+
+.text
+.file 1 "1.c"
+
+.section  .debug_info,"", at progbits
+  .long  0x4b            # Compile Unit: length = 0x0000004b)
+  .value  0x4            # version = 0x0004
+  .long  0               # abbr_offset = 0x0
+  .byte  0x8             # addr_size = 0x08
+
+  .uleb128 0x1           # DW_TAG_compile_unit [1] *
+  .long  0               # DW_AT_producer [DW_FORM_strp]  ( .debug_str[0x00000000] = )
+  .byte  0x4             # DW_AT_language [DW_FORM_data1]  (DW_LANG_C_plus_plus)
+  .string  "1.c"         # DW_AT_name [DW_FORM_string]  ("1.c")
+  .long  0               # DW_AT_comp_dir [DW_FORM_strp]  ( .debug_str[0x00000000] = )
+  .long  0               # DW_AT_stmt_list [DW_FORM_sec_offset]  (0x00000000)
+                         
+  .uleb128 0x2           # DW_TAG_variable [2]
+  .string  "foo"         # DW_AT_name [DW_FORM_string]  ("foo")
+  .byte  0x1             # DW_AT_decl_file [DW_FORM_data1]  ("1.c")
+  .byte  0x1             # DW_AT_decl_line [DW_FORM_data1]  (1)
+  .long  0x32            # DW_AT_type [DW_FORM_ref4]  (cu + 0x0032 => {0x00000032})
+  .uleb128 0x9           # DW_AT_external [DW_FORM_flag_present]  (true)
+  .byte  0x3             
+  .quad  foo             # DW_AT_location [DW_FORM_exprloc]  (DW_OP_addr 0x0)
+                         
+  .uleb128 0x3           # DW_TAG_base_type [3]
+  .byte  0x4             # DW_AT_byte_size [DW_FORM_data1]  (0x04)
+  .byte  0x5             # DW_AT_encoding [DW_FORM_data1]  (DW_ATE_signed)
+  .string  "int"         # DW_AT_name [DW_FORM_string]  ("int")
+                         
+  .uleb128 0x2           # DW_TAG_variable [2]
+  .string  "bar"         # DW_AT_name [DW_FORM_string]  ("bar")
+  .byte  0x1             # DW_AT_decl_file [DW_FORM_data1]  ("1.c")
+  .byte  0x2             # DW_AT_decl_line [DW_FORM_data1]  (2)
+  .long  0x32            # DW_AT_type [DW_FORM_ref4]  (cu + 0x0032 => {0x00000032})
+  .uleb128 0x9           # DW_AT_external [DW_FORM_flag_present]  (true)
+  .byte  0x3             
+  .quad  bar             # DW_AT_location [DW_FORM_exprloc]  (DW_OP_addr 0x0)
+  .byte  0               # END
+  
+
+.section  .debug_abbrev,"", at progbits
+  .uleb128 0x1   # Abbreviation code.
+  .uleb128 0x11  # DW_TAG_compile_unit
+  
+  .byte  0x1     # ID
+  .uleb128 0x25  # DW_AT_producer, DW_FORM_strp
+  .uleb128 0xe
+  .uleb128 0x13  # DW_AT_language, DW_FORM_data1
+  .uleb128 0xb
+  .uleb128 0x3   # DW_AT_name, DW_FORM_string
+  .uleb128 0x8
+  .uleb128 0x1b  # DW_AT_comp_dir, DW_FORM_strp
+  .uleb128 0xe
+  .uleb128 0x10  # DW_AT_stmt_list, DW_FORM_sec_offset
+  .uleb128 0x17
+  .byte  0
+  .byte  0
+  
+  .uleb128 0x2  # ID
+  .uleb128 0x34 # DW_TAG_variable, DW_CHILDREN_no
+  .byte  0
+  .uleb128 0x3  # DW_AT_name, DW_FORM_string
+  .uleb128 0x8
+  .uleb128 0x3a # DW_AT_decl_file, DW_FORM_data1
+  .uleb128 0xb
+  .uleb128 0x3b # DW_AT_decl_line, DW_FORM_data1
+  .uleb128 0xb
+  .uleb128 0x49 # DW_AT_type, DW_FORM_ref4
+  .uleb128 0x13
+  .uleb128 0x3f # DW_AT_external, DW_FORM_flag_present
+  .uleb128 0x19
+  .uleb128 0x2  # DW_AT_location, DW_FORM_exprloc
+  .uleb128 0x18
+  .byte  0
+  .byte  0
+  
+  .uleb128 0x3  # ID
+  .uleb128 0x24 # DW_TAG_base_type, DW_CHILDREN_no
+  .byte  0
+  .uleb128 0xb  # DW_AT_byte_size, DW_FORM_data1
+  .uleb128 0xb
+  .uleb128 0x3e # DW_AT_encoding, DW_FORM_data1
+  .uleb128 0xb
+  .uleb128 0x3  # DW_AT_name, DW_FORM_string
+  .uleb128 0x8
+  .byte  0
+  .byte  0
+  .byte  0
+




More information about the llvm-commits mailing list