[llvm] 92ee3dd - DebugInfo/Symbolize: Don't differentiate function/data symbolization
Fangrui Song via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 11 19:22:52 PST 2021
Author: Fangrui Song
Date: 2021-02-11T19:22:44-08:00
New Revision: 92ee3dd95dc666df7c37ccdf3037ced921a519e6
URL: https://github.com/llvm/llvm-project/commit/92ee3dd95dc666df7c37ccdf3037ced921a519e6
DIFF: https://github.com/llvm/llvm-project/commit/92ee3dd95dc666df7c37ccdf3037ced921a519e6.diff
LOG: DebugInfo/Symbolize: Don't differentiate function/data symbolization
Before d08bd13ac8a560c4645e17e192ca07e1bdcd2895, only `SymbolRef::ST_Function`
symbols were used for .symtab symbolization. That commit added a `"DATA"` mode
to llvm-symbolizer which used `SymbolRef::ST_Data` symbols for symbolization.
Since function and data symbols have different addresses, we don't need to
differentiate the two modes. This patches unifies the two modes to simplify
code.
`"DATA"` is used by `compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp`.
`check-hwasan` and `check-tsan` have runtime tests.
Differential Revision: https://reviews.llvm.org/D96322
Added:
llvm/test/DebugInfo/Symbolize/ELF/data-command-symtab.yaml
Modified:
llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h
Removed:
################################################################################
diff --git a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
index 2c6a96a973d1..52342b27cf20 100644
--- a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
@@ -70,23 +70,19 @@ SymbolizableObjectFile::create(const object::ObjectFile *Obj,
return std::move(E);
}
- std::vector<SymbolDesc> &Fs = res->Functions, &Os = res->Objects;
- auto Uniquify = [](std::vector<SymbolDesc> &S) {
- // Sort by (Addr,Size,Name). If several SymbolDescs share the same Addr,
- // pick the one with the largest Size. This helps us avoid symbols with no
- // size information (Size=0).
- llvm::sort(S);
- auto I = S.begin(), E = S.end(), J = S.begin();
- while (I != E) {
- auto OI = I;
- while (++I != E && OI->Addr == I->Addr) {
- }
- *J++ = I[-1];
+ std::vector<SymbolDesc> &SS = res->Symbols;
+ // Sort by (Addr,Size,Name). If several SymbolDescs share the same Addr,
+ // pick the one with the largest Size. This helps us avoid symbols with no
+ // size information (Size=0).
+ llvm::sort(SS);
+ auto I = SS.begin(), E = SS.end(), J = SS.begin();
+ while (I != E) {
+ auto OI = I;
+ while (++I != E && OI->Addr == I->Addr) {
}
- S.erase(J, S.end());
- };
- Uniquify(Fs);
- Uniquify(Os);
+ *J++ = I[-1];
+ }
+ SS.erase(J, SS.end());
return std::move(res);
}
@@ -138,7 +134,7 @@ Error SymbolizableObjectFile::addCoffExportSymbols(
uint32_t NextOffset = I != E ? I->Offset : Export.Offset + 1;
uint64_t SymbolStart = ImageBase + Export.Offset;
uint64_t SymbolSize = NextOffset - Export.Offset;
- Functions.push_back({SymbolStart, SymbolSize, Export.Name, 0});
+ Symbols.push_back({SymbolStart, SymbolSize, Export.Name, 0});
}
return Error::success();
}
@@ -209,15 +205,7 @@ Error SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol,
if (Obj.isELF() && ELFSymbolRef(Symbol).getBinding() != ELF::STB_LOCAL)
ELFSymIdx = 0;
- SymbolDesc SD = {SymbolAddress, SymbolSize, SymbolName, ELFSymIdx};
- // DATA command symbolizes just ST_Data (ELF STT_OBJECT) symbols as an
- // optimization. Treat everything else (e.g. ELF STT_NOTYPE, STT_FUNC and
- // STT_GNU_IFUNC) as function symbols which can be used to symbolize
- // addresses.
- if (SymbolType == SymbolRef::ST_Data)
- Objects.push_back(SD);
- else
- Functions.push_back(SD);
+ Symbols.push_back({SymbolAddress, SymbolSize, SymbolName, ELFSymIdx});
return Error::success();
}
@@ -234,9 +222,8 @@ uint64_t SymbolizableObjectFile::getModulePreferredBase() const {
}
bool SymbolizableObjectFile::getNameFromSymbolTable(
- SymbolRef::Type Type, uint64_t Address, std::string &Name, uint64_t &Addr,
- uint64_t &Size, std::string &FileName) const {
- const auto &Symbols = Type == SymbolRef::ST_Function ? Functions : Objects;
+ uint64_t Address, std::string &Name, uint64_t &Addr, uint64_t &Size,
+ std::string &FileName) const {
SymbolDesc SD{Address, UINT64_C(-1), StringRef(), 0};
auto SymbolIterator = llvm::upper_bound(Symbols, SD);
if (SymbolIterator == Symbols.begin())
@@ -287,8 +274,8 @@ SymbolizableObjectFile::symbolizeCode(object::SectionedAddress ModuleOffset,
if (shouldOverrideWithSymbolTable(LineInfoSpecifier.FNKind, UseSymbolTable)) {
std::string FunctionName, FileName;
uint64_t Start, Size;
- if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset.Address,
- FunctionName, Start, Size, FileName)) {
+ if (getNameFromSymbolTable(ModuleOffset.Address, FunctionName, Start, Size,
+ FileName)) {
LineInfo.FunctionName = FunctionName;
if (LineInfo.FileName == DILineInfo::BadString && !FileName.empty())
LineInfo.FileName = FileName;
@@ -314,8 +301,8 @@ DIInliningInfo SymbolizableObjectFile::symbolizeInlinedCode(
if (shouldOverrideWithSymbolTable(LineInfoSpecifier.FNKind, UseSymbolTable)) {
std::string FunctionName, FileName;
uint64_t Start, Size;
- if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset.Address,
- FunctionName, Start, Size, FileName)) {
+ if (getNameFromSymbolTable(ModuleOffset.Address, FunctionName, Start, Size,
+ FileName)) {
DILineInfo *LI = InlinedContext.getMutableFrame(
InlinedContext.getNumberOfFrames() - 1);
LI->FunctionName = FunctionName;
@@ -331,8 +318,8 @@ DIGlobal SymbolizableObjectFile::symbolizeData(
object::SectionedAddress ModuleOffset) const {
DIGlobal Res;
std::string FileName;
- getNameFromSymbolTable(SymbolRef::ST_Data, ModuleOffset.Address, Res.Name,
- Res.Start, Res.Size, FileName);
+ getNameFromSymbolTable(ModuleOffset.Address, Res.Name, Res.Start, Res.Size,
+ FileName);
return Res;
}
diff --git a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h
index 616096cb9db0..8fb003fff0ae 100644
--- a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h
+++ b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h
@@ -55,8 +55,8 @@ class SymbolizableObjectFile : public SymbolizableModule {
bool shouldOverrideWithSymbolTable(FunctionNameKind FNKind,
bool UseSymbolTable) const;
- bool getNameFromSymbolTable(object::SymbolRef::Type Type, uint64_t Address,
- std::string &Name, uint64_t &Addr, uint64_t &Size,
+ bool getNameFromSymbolTable(uint64_t Address, std::string &Name,
+ uint64_t &Addr, uint64_t &Size,
std::string &FileName) const;
// For big-endian PowerPC64 ELF, OpdAddress is the address of the .opd
// (function descriptor) section and OpdExtractor refers to its contents.
@@ -87,8 +87,7 @@ class SymbolizableObjectFile : public SymbolizableModule {
return Addr != RHS.Addr ? Addr < RHS.Addr : Size < RHS.Size;
}
};
- std::vector<SymbolDesc> Functions;
- std::vector<SymbolDesc> Objects;
+ std::vector<SymbolDesc> Symbols;
// (index, filename) pairs of ELF STT_FILE symbols.
std::vector<std::pair<uint32_t, StringRef>> FileSymbols;
diff --git a/llvm/test/DebugInfo/Symbolize/ELF/data-command-symtab.yaml b/llvm/test/DebugInfo/Symbolize/ELF/data-command-symtab.yaml
new file mode 100644
index 000000000000..984e444b2fda
--- /dev/null
+++ b/llvm/test/DebugInfo/Symbolize/ELF/data-command-symtab.yaml
@@ -0,0 +1,52 @@
+## Function and data symbols have
diff erent addresses so internally we treat
+## STT_NOTYPE/STT_FUNC/STT_DATA/etc the same. The DATA command may get a function
+## as result. For regular applications, the input addresses are guaranteed to be
+## related to data symbols.
+# RUN: yaml2obj %s -o %t
+# RUN: llvm-symbolizer --obj=%t 'DATA 0x1000' 'DATA 0x2000' 'DATA 0x2002' | FileCheck %s
+
+# CHECK: func
+# CHECK-NEXT: 4096 1
+# CHECK-EMPTY:
+# CHECK-NEXT: data
+# CHECK-NEXT: 8192 2
+# CHECK-EMPTY:
+# CHECK-NEXT: notype
+# CHECK-NEXT: 8194 3
+# CHECK-EMPTY:
+
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_EXEC
+ Machine: EM_X86_64
+Sections:
+ - Name: .text
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ Address: 0x1000
+ Size: 1
+ - Name: .data
+ Type: SHT_PROGBITS
+ Flags: [ SHF_WRITE, SHF_ALLOC ]
+ Address: 0x2000
+ Size: 5
+Symbols:
+ - Name: func
+ Type: STT_FUNC
+ Section: .text
+ Binding: STB_GLOBAL
+ Value: 0x1000
+ Size: 1
+ - Name: data
+ Type: STT_OBJECT
+ Section: .data
+ Binding: STB_GLOBAL
+ Value: 0x2000
+ Size: 2
+ - Name: notype
+ Section: .data
+ Binding: STB_GLOBAL
+ Value: 0x2002
+ Size: 3
More information about the llvm-commits
mailing list