[llvm] 04a2e12 - DebugInfo/Symbolize: Retrieve filename from the preceding STT_FILE for .symtab symbolization
Fangrui Song via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 10 09:47:18 PST 2021
Author: Fangrui Song
Date: 2021-02-10T09:47:10-08:00
New Revision: 04a2e12612f49e65a7dc14fa45f11f0632d427f2
URL: https://github.com/llvm/llvm-project/commit/04a2e12612f49e65a7dc14fa45f11f0632d427f2
DIFF: https://github.com/llvm/llvm-project/commit/04a2e12612f49e65a7dc14fa45f11f0632d427f2.diff
LOG: DebugInfo/Symbolize: Retrieve filename from the preceding STT_FILE for .symtab symbolization
The ELF spec says:
> STT_FILE: Conventionally, the symbol's name gives the name of the source file associated with the object file. A file symbol has STB_LOCAL binding, its section index is SHN_ABS, and it precedes the other STB_LOCAL symbols for the file, if it is present.
For a local symbol, the preceding STT_FILE symbol is almost always in the same
file[1]. GNU addr2line uses this heuristic to retrieve the filename associated
with a local symbol (e.g. internal linkage functions in C/C++).
GNU addr2line can assign STT_FILE filename to a non-local symbol, too, but the trick
only works if no regular symbol precede STT_FILE. This patch does not implement this corner case
(not useful for most executables which have more than one files).
In case of filename mismatch between .debug_line & .symtab, arbitrarily make .debug_line win.
[1]: LLD does not synthesize STT_FILE symbols
(https://bugs.llvm.org/show_bug.cgi?id=48023 see also
https://sourceware.org/bugzilla/show_bug.cgi?id=26822). An assembly file
without `.file` directives can cause mis-attribution. This is an edge case.
Differential Revision: https://reviews.llvm.org/D95927
Added:
llvm/test/DebugInfo/Symbolize/ELF/symtab-file-conflict.s
llvm/test/DebugInfo/Symbolize/ELF/symtab-file2.yaml
Modified:
llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h
llvm/test/DebugInfo/Symbolize/ELF/symtab-file.s
llvm/test/DebugInfo/Symbolize/ELF/symtab-ignored.s
llvm/test/DebugInfo/Symbolize/ELF/symtab-notype.s
Removed:
################################################################################
diff --git a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
index b260cd9da485..2c6a96a973d1 100644
--- a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
@@ -16,6 +16,7 @@
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/Object/COFF.h"
+#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Object/SymbolSize.h"
#include "llvm/Support/Casting.h"
@@ -69,9 +70,8 @@ SymbolizableObjectFile::create(const object::ObjectFile *Obj,
return std::move(E);
}
- std::vector<std::pair<SymbolDesc, StringRef>> &Fs = res->Functions,
- &Os = res->Objects;
- auto Uniquify = [](std::vector<std::pair<SymbolDesc, StringRef>> &S) {
+ std::vector<SymbolDesc> &Fs = res->Functions, &Os = res->Objects;
+ auto Uniquify = [](std::vector<SymbolDesc> &S) {
// Sort by (Addr,Size,Name). If several SymbolDescs share the same Addr,
// pick the one with the largest Size. This helps us avoid symbols with no
// size information (Size=0).
@@ -79,7 +79,7 @@ SymbolizableObjectFile::create(const object::ObjectFile *Obj,
auto I = S.begin(), E = S.end(), J = S.begin();
while (I != E) {
auto OI = I;
- while (++I != E && OI->first.Addr == I->first.Addr) {
+ while (++I != E && OI->Addr == I->Addr) {
}
*J++ = I[-1];
}
@@ -138,8 +138,7 @@ Error SymbolizableObjectFile::addCoffExportSymbols(
uint32_t NextOffset = I != E ? I->Offset : Export.Offset + 1;
uint64_t SymbolStart = ImageBase + Export.Offset;
uint64_t SymbolSize = NextOffset - Export.Offset;
- SymbolDesc SD = {SymbolStart, SymbolSize};
- Functions.emplace_back(SD, Export.Name);
+ Functions.push_back({SymbolStart, SymbolSize, Export.Name, 0});
}
return Error::success();
}
@@ -150,9 +149,23 @@ Error SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol,
uint64_t OpdAddress) {
// Avoid adding symbols from an unknown/undefined section.
const ObjectFile &Obj = *Symbol.getObject();
+ Expected<StringRef> SymbolNameOrErr = Symbol.getName();
+ if (!SymbolNameOrErr)
+ return SymbolNameOrErr.takeError();
+ StringRef SymbolName = *SymbolNameOrErr;
+
+ uint32_t ELFSymIdx =
+ Obj.isELF() ? ELFSymbolRef(Symbol).getRawDataRefImpl().d.b : 0;
Expected<section_iterator> Sec = Symbol.getSection();
- if (!Sec || Obj.section_end() == *Sec)
+ if (!Sec || Obj.section_end() == *Sec) {
+ if (Obj.isELF()) {
+ // Store the (index, filename) pair for a file symbol.
+ ELFSymbolRef ESym(Symbol);
+ if (ESym.getELFType() == ELF::STT_FILE)
+ FileSymbols.emplace_back(ELFSymIdx, SymbolName);
+ }
return Error::success();
+ }
Expected<SymbolRef::Type> SymbolTypeOrErr = Symbol.getType();
if (!SymbolTypeOrErr)
@@ -190,24 +203,21 @@ Error SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol,
if (OpdExtractor->isValidOffsetForAddress(OpdOffset))
SymbolAddress = OpdExtractor->getAddress(&OpdOffset);
}
- Expected<StringRef> SymbolNameOrErr = Symbol.getName();
- if (!SymbolNameOrErr)
- return SymbolNameOrErr.takeError();
- StringRef SymbolName = *SymbolNameOrErr;
// Mach-O symbol table names have leading underscore, skip it.
if (Module->isMachO() && !SymbolName.empty() && SymbolName[0] == '_')
SymbolName = SymbolName.drop_front();
- SymbolDesc SD = {SymbolAddress, SymbolSize};
-
+ if (Obj.isELF() && ELFSymbolRef(Symbol).getBinding() != ELF::STB_LOCAL)
+ ELFSymIdx = 0;
+ SymbolDesc SD = {SymbolAddress, SymbolSize, SymbolName, ELFSymIdx};
// DATA command symbolizes just ST_Data (ELF STT_OBJECT) symbols as an
// optimization. Treat everything else (e.g. ELF STT_NOTYPE, STT_FUNC and
// STT_GNU_IFUNC) as function symbols which can be used to symbolize
// addresses.
if (SymbolType == SymbolRef::ST_Data)
- Objects.emplace_back(SD, SymbolName);
+ Objects.push_back(SD);
else
- Functions.emplace_back(SD, SymbolName);
+ Functions.push_back(SD);
return Error::success();
}
@@ -223,23 +233,33 @@ uint64_t SymbolizableObjectFile::getModulePreferredBase() const {
return 0;
}
-bool SymbolizableObjectFile::getNameFromSymbolTable(SymbolRef::Type Type,
- uint64_t Address,
- std::string &Name,
- uint64_t &Addr,
- uint64_t &Size) const {
+bool SymbolizableObjectFile::getNameFromSymbolTable(
+ SymbolRef::Type Type, uint64_t Address, std::string &Name, uint64_t &Addr,
+ uint64_t &Size, std::string &FileName) const {
const auto &Symbols = Type == SymbolRef::ST_Function ? Functions : Objects;
- std::pair<SymbolDesc, StringRef> SD{{Address, UINT64_C(-1)}, StringRef()};
+ SymbolDesc SD{Address, UINT64_C(-1), StringRef(), 0};
auto SymbolIterator = llvm::upper_bound(Symbols, SD);
if (SymbolIterator == Symbols.begin())
return false;
--SymbolIterator;
- if (SymbolIterator->first.Size != 0 &&
- SymbolIterator->first.Addr + SymbolIterator->first.Size <= Address)
+ if (SymbolIterator->Size != 0 &&
+ SymbolIterator->Addr + SymbolIterator->Size <= Address)
return false;
- Name = SymbolIterator->second.str();
- Addr = SymbolIterator->first.Addr;
- Size = SymbolIterator->first.Size;
+ Name = SymbolIterator->Name.str();
+ Addr = SymbolIterator->Addr;
+ Size = SymbolIterator->Size;
+
+ if (SymbolIterator->ELFLocalSymIdx != 0) {
+ // If this is an ELF local symbol, find the STT_FILE symbol preceding
+ // SymbolIterator to get the filename. The ELF spec requires the STT_FILE
+ // symbol (if present) precedes the other STB_LOCAL symbols for the file.
+ assert(Module->isELF());
+ auto It = llvm::upper_bound(
+ FileSymbols,
+ std::make_pair(SymbolIterator->ELFLocalSymIdx, StringRef()));
+ if (It != FileSymbols.begin())
+ FileName = It[-1].second.str();
+ }
return true;
}
@@ -265,11 +285,13 @@ SymbolizableObjectFile::symbolizeCode(object::SectionedAddress ModuleOffset,
// Override function name from symbol table if necessary.
if (shouldOverrideWithSymbolTable(LineInfoSpecifier.FNKind, UseSymbolTable)) {
- std::string FunctionName;
+ std::string FunctionName, FileName;
uint64_t Start, Size;
if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset.Address,
- FunctionName, Start, Size)) {
+ FunctionName, Start, Size, FileName)) {
LineInfo.FunctionName = FunctionName;
+ if (LineInfo.FileName == DILineInfo::BadString && !FileName.empty())
+ LineInfo.FileName = FileName;
}
}
return LineInfo;
@@ -290,12 +312,15 @@ DIInliningInfo SymbolizableObjectFile::symbolizeInlinedCode(
// Override the function name in lower frame with name from symbol table.
if (shouldOverrideWithSymbolTable(LineInfoSpecifier.FNKind, UseSymbolTable)) {
- std::string FunctionName;
+ std::string FunctionName, FileName;
uint64_t Start, Size;
if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset.Address,
- FunctionName, Start, Size)) {
- InlinedContext.getMutableFrame(InlinedContext.getNumberOfFrames() - 1)
- ->FunctionName = FunctionName;
+ FunctionName, Start, Size, FileName)) {
+ DILineInfo *LI = InlinedContext.getMutableFrame(
+ InlinedContext.getNumberOfFrames() - 1);
+ LI->FunctionName = FunctionName;
+ if (LI->FileName == DILineInfo::BadString && !FileName.empty())
+ LI->FileName = FileName;
}
}
@@ -305,8 +330,9 @@ DIInliningInfo SymbolizableObjectFile::symbolizeInlinedCode(
DIGlobal SymbolizableObjectFile::symbolizeData(
object::SectionedAddress ModuleOffset) const {
DIGlobal Res;
+ std::string FileName;
getNameFromSymbolTable(SymbolRef::ST_Data, ModuleOffset.Address, Res.Name,
- Res.Start, Res.Size);
+ Res.Start, Res.Size, FileName);
return Res;
}
diff --git a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h
index be3c66df056f..616096cb9db0 100644
--- a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h
+++ b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h
@@ -56,8 +56,8 @@ class SymbolizableObjectFile : public SymbolizableModule {
bool UseSymbolTable) const;
bool getNameFromSymbolTable(object::SymbolRef::Type Type, uint64_t Address,
- std::string &Name, uint64_t &Addr,
- uint64_t &Size) const;
+ std::string &Name, uint64_t &Addr, uint64_t &Size,
+ std::string &FileName) const;
// For big-endian PowerPC64 ELF, OpdAddress is the address of the .opd
// (function descriptor) section and OpdExtractor refers to its contents.
Error addSymbol(const object::SymbolRef &Symbol, uint64_t SymbolSize,
@@ -78,12 +78,19 @@ class SymbolizableObjectFile : public SymbolizableModule {
// the following symbol.
uint64_t Size;
+ StringRef Name;
+ // Non-zero if this is an ELF local symbol. See the comment in
+ // getNameFromSymbolTable.
+ uint32_t ELFLocalSymIdx;
+
bool operator<(const SymbolDesc &RHS) const {
return Addr != RHS.Addr ? Addr < RHS.Addr : Size < RHS.Size;
}
};
- std::vector<std::pair<SymbolDesc, StringRef>> Functions;
- std::vector<std::pair<SymbolDesc, StringRef>> Objects;
+ std::vector<SymbolDesc> Functions;
+ std::vector<SymbolDesc> Objects;
+ // (index, filename) pairs of ELF STT_FILE symbols.
+ std::vector<std::pair<uint32_t, StringRef>> FileSymbols;
SymbolizableObjectFile(const object::ObjectFile *Obj,
std::unique_ptr<DIContext> DICtx,
diff --git a/llvm/test/DebugInfo/Symbolize/ELF/symtab-file-conflict.s b/llvm/test/DebugInfo/Symbolize/ELF/symtab-file-conflict.s
new file mode 100644
index 000000000000..280175dcf753
--- /dev/null
+++ b/llvm/test/DebugInfo/Symbolize/ELF/symtab-file-conflict.s
@@ -0,0 +1,14 @@
+# REQUIRES: x86-registered-target
+## If the filename from the preceding STT_FILE does not match .debug_line,
+## STT_FILE wins. Compilers should not emit such bogus information.
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t
+# RUN: llvm-symbolizer --obj=%t 0 | FileCheck %s
+
+# CHECK: foo
+# CHECK-NEXT: 1.c:0:0
+
+.file "1.c"
+.file 0 "/tmp" "0.c"
+foo:
+ .loc 0 1 0
+ nop
diff --git a/llvm/test/DebugInfo/Symbolize/ELF/symtab-file.s b/llvm/test/DebugInfo/Symbolize/ELF/symtab-file.s
index 04761635828b..64679a952197 100644
--- a/llvm/test/DebugInfo/Symbolize/ELF/symtab-file.s
+++ b/llvm/test/DebugInfo/Symbolize/ELF/symtab-file.s
@@ -4,15 +4,14 @@
# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t
# RUN: llvm-symbolizer --obj=%t 0 1 2 | FileCheck %s
-## TODO Find the preceding STT_FILE symbol as the filename of a local symbol.
# CHECK: local1
-# CHECK-NEXT: ??:0:0
+# CHECK-NEXT: 1.c:0:0
# CHECK-EMPTY:
# CHECK-NEXT: local2
-# CHECK-NEXT: ??:0:0
+# CHECK-NEXT: 2.c:0:0
# CHECK-EMPTY:
# CHECK-NEXT: local3
-# CHECK-NEXT: ??:0:0
+# CHECK-NEXT: 3.c:0:0
# CHECK-EMPTY:
.file "1.c"
diff --git a/llvm/test/DebugInfo/Symbolize/ELF/symtab-file2.yaml b/llvm/test/DebugInfo/Symbolize/ELF/symtab-file2.yaml
new file mode 100644
index 000000000000..2d68eaa1e5ba
--- /dev/null
+++ b/llvm/test/DebugInfo/Symbolize/ELF/symtab-file2.yaml
@@ -0,0 +1,75 @@
+# RUN: yaml2obj --docnum=1 %s -o %t1
+# RUN: llvm-symbolizer --obj=%t1 0 1 2 | FileCheck %s
+
+## The local symbol has no preceding STT_FILE. Its filename is unavailable.
+# CHECK: local
+# CHECK-NEXT: ??:0:0
+# CHECK-EMPTY:
+
+## All local symbols precede all non-local symbols. When there are multiple
+## STT_FILE symbols, we cannot tell which file defines the non-local symbol in
+## question. We could tell if there is only one STT_FILE but in reality there
+## are always more than one file, so implementing the special case is not useful.
+# CHECK-NEXT: global
+# CHECK-NEXT: ??:0:0
+# CHECK-EMPTY:
+# CHECK-NEXT: weak
+# CHECK-NEXT: ??:0:0
+# CHECK-EMPTY:
+
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_DYN
+ Machine: EM_X86_64
+Sections:
+ - Name: .text
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ Size: 3
+Symbols:
+ - Name: local
+ Section: .text
+ Value: 0
+ - Name: 1.c
+ Type: STT_FILE
+ Index: SHN_ABS
+ - Name: global
+ Binding: STB_GLOBAL
+ Section: .text
+ Value: 1
+ - Name: weak
+ Binding: STB_WEAK
+ Section: .text
+ Value: 2
+
+## If st_name of the STT_FILE symbols is invalid, the symbol name is lost as well.
+## TODO Keep the symbol name.
+# RUN: yaml2obj --docnum=2 %s -o %t2
+# RUN: llvm-symbolizer --obj=%t2 0 0 2>&1 | FileCheck %s --check-prefix=CHECK2
+
+# CHECK2: error reading file: st_name (0xffff) is past the end of the string table of size
+# CHECK2-NEXT: ??
+# CHECK2-NEXT: ??:0:0
+# CHECK2-EMPTY:
+# CHECK2-NEXT: ??
+# CHECK2-NEXT: ??:0:0
+
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_DYN
+ Machine: EM_X86_64
+Sections:
+ - Name: .text
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ Size: 1
+Symbols:
+ - StName: 0xffff
+ Type: STT_FILE
+ Index: SHN_ABS
+ - Name: local
+ Section: .text
diff --git a/llvm/test/DebugInfo/Symbolize/ELF/symtab-ignored.s b/llvm/test/DebugInfo/Symbolize/ELF/symtab-ignored.s
index 37ece2b390a1..dc9ac2857dcd 100644
--- a/llvm/test/DebugInfo/Symbolize/ELF/symtab-ignored.s
+++ b/llvm/test/DebugInfo/Symbolize/ELF/symtab-ignored.s
@@ -4,7 +4,7 @@
# RUN: llvm-symbolizer --obj=%t 0 | FileCheck %s
# CHECK: b
-# CHECK-NEXT: ??:0:0
+# CHECK-NEXT: 1.c:0:0
# CHECK-EMPTY:
.file "1.c"
diff --git a/llvm/test/DebugInfo/Symbolize/ELF/symtab-notype.s b/llvm/test/DebugInfo/Symbolize/ELF/symtab-notype.s
index ff6727560eff..d3f1d3757083 100644
--- a/llvm/test/DebugInfo/Symbolize/ELF/symtab-notype.s
+++ b/llvm/test/DebugInfo/Symbolize/ELF/symtab-notype.s
@@ -20,21 +20,20 @@
# CHECK-EMPTY:
# CHECK-NEXT: l_notype
-# CHECK-NEXT: ??:0:0
+# CHECK-NEXT: symtab-notype.s:0:0
# CHECK-EMPTY:
## TODO addr2line does not symbolize the last two out-of-bounds addresses.
# CHECK-NEXT: l_notype_nosize
-# CHECK-NEXT: ??:0:0
+# CHECK-NEXT: symtab-notype.s:0:0
# CHECK-EMPTY:
# CHECK-NEXT: l_notype_nosize
-# CHECK-NEXT: ??:0:0
+# CHECK-NEXT: symtab-notype.s:0:0
# CHECK-EMPTY:
# CHECK-NEXT: l_notype_nosize
-# CHECK-NEXT: ??:0:0
+# CHECK-NEXT: symtab-notype.s:0:0
# CHECK-EMPTY:
-## TODO Find the preceding STT_FILE symbol as the filename of a local symbol.
.file "symtab-notype.s"
.globl _start, g_notype
More information about the llvm-commits
mailing list