[llvm] b3a778f - [llvm-profgen] Support symbol loading for debug fission

via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 23 09:41:04 PST 2022


Author: wlei
Date: 2022-02-23T09:40:46-08:00
New Revision: b3a778fb5eca0e6032f3d62c03a673c2431be8a0

URL: https://github.com/llvm/llvm-project/commit/b3a778fb5eca0e6032f3d62c03a673c2431be8a0
DIFF: https://github.com/llvm/llvm-project/commit/b3a778fb5eca0e6032f3d62c03a673c2431be8a0.diff

LOG: [llvm-profgen] Support symbol loading for debug fission

Support to load debug info from dwarf split file, like .dwo, .dwp files. Leverage the `getNonSkeletonUnitDIE(false)` API to achieve this.

Add test cause to make sure all the ranges is well retrieved by the loader.

Reviewed By: ayermolo, hoy, wenlei

Differential Revision: https://reviews.llvm.org/D115973

Added: 
    llvm/test/tools/llvm-profgen/Inputs/split-dwarf-single.exe
    llvm/test/tools/llvm-profgen/Inputs/split-dwarf-single.o.yaml
    llvm/test/tools/llvm-profgen/Inputs/split-dwarf-split.dwo.yaml
    llvm/test/tools/llvm-profgen/Inputs/split-dwarf-split.exe
    llvm/test/tools/llvm-profgen/split-dwarf.test

Modified: 
    llvm/tools/llvm-profgen/ProfiledBinary.cpp
    llvm/tools/llvm-profgen/ProfiledBinary.h

Removed: 
    


################################################################################
diff  --git a/llvm/test/tools/llvm-profgen/Inputs/split-dwarf-single.exe b/llvm/test/tools/llvm-profgen/Inputs/split-dwarf-single.exe
new file mode 100755
index 0000000000000..86a7775ece4c6
Binary files /dev/null and b/llvm/test/tools/llvm-profgen/Inputs/split-dwarf-single.exe 
diff er

diff  --git a/llvm/test/tools/llvm-profgen/Inputs/split-dwarf-single.o.yaml b/llvm/test/tools/llvm-profgen/Inputs/split-dwarf-single.o.yaml
new file mode 100644
index 0000000000000..a3ace754518e0
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/Inputs/split-dwarf-single.o.yaml
@@ -0,0 +1,92 @@
+# Source code:
+
+# int foo() {
+#   return 1;
+# }
+#
+# int main() {
+#   foo();
+#   return 0;
+# }
+
+# Build instructions (clang version 15.0.0):
+# split-dwarf-single.o:       clang -gsplit-dwarf=single -fdebug-compilation-dir=.  test.c   -fdebug-info-for-profiling  -O0 -g -o split-dwarf-single.o -c
+# split-dwarf-single.o.yaml:  obj2yaml split-dwarf-single.o > split-dwarf-single.o.yaml
+
+--- !ELF
+FileHeader:
+  Class:           ELFCLASS64
+  Data:            ELFDATA2LSB
+  Type:            ET_REL
+  Machine:         EM_X86_64
+  SectionHeaderStringTable: .strtab
+Sections:
+  - Name:            .debug_abbrev
+    Type:            SHT_PROGBITS
+    AddressAlign:    0x1
+    Content:         014A00101772171B25B442197625111B12067317000000
+  - Name:            .debug_info
+    Type:            SHT_PROGBITS
+    AddressAlign:    0x1
+    Content:         2400000005000408000000000428758115ED87CF0100000000000000000001002C00000000000000
+  - Name:            .debug_str_offsets
+    Type:            SHT_PROGBITS
+    AddressAlign:    0x1
+    Content:         0C000000050000000000000000000000
+  - Name:            .debug_str_offsets.dwo
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_EXCLUDE ]
+    AddressAlign:    0x1
+    Content:         1C000000050000000000000004000000080000000D000000760000007D000000
+  - Name:            .debug_str.dwo
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_EXCLUDE, SHF_MERGE, SHF_STRINGS ]
+    AddressAlign:    0x1
+    EntSize:         0x1
+    Content:         666F6F00696E74006D61696E00636C616E672076657273696F6E2031352E302E30202868747470733A2F2F6769746875622E636F6D2F6C6C766D2F6C6C766D2D70726F6A6563742E67697420313565336538396239626162323838363862303930656539663336326161386630333233303934372900746573742E630073706C69742D64776172662D73696E676C652E6F00
+  - Name:            .debug_info.dwo
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_EXCLUDE ]
+    AddressAlign:    0x1
+    Content:         3900000005000508000000000428758115ED87CF01030C00040502000B00000001560000013800000002011C0000000156020005380000000301050400
+  - Name:            .debug_abbrev.dwo
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_EXCLUDE ]
+    AddressAlign:    0x1
+    Content:         01110125251305032576250000022E00111B1206401803253A0B3B0B49133F19000003240003253E0B0B0B000000
+  - Name:            .debug_line
+    Type:            SHT_PROGBITS
+    AddressAlign:    0x1
+    Content:         5E0000000500080037000000010101FB0E0D00010101010000000100000101011F010000000003011F020F051E010000000000EF173AFD4B2F5E20815DE19BD24360F4040000090200000000000000000105030A4B0500BD05030AE5590208000101
+  - Name:            .debug_line_str
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_MERGE, SHF_STRINGS ]
+    AddressAlign:    0x1
+    EntSize:         0x1
+    Content:         2E00746573742E6300
+  - Type:            SectionHeaderTable
+    Sections:
+      - Name:            .strtab
+      - Name:            .debug_abbrev
+      - Name:            .debug_info
+      - Name:            .debug_str_offsets
+      - Name:            .debug_str
+      - Name:            .debug_str_offsets.dwo
+      - Name:            .debug_str.dwo
+      - Name:            .debug_info.dwo
+      - Name:            .debug_abbrev.dwo
+      - Name:            .debug_addr
+      - Name:            .debug_line
+      - Name:            .debug_line_str
+DWARF:
+  debug_str:
+    - .
+    - split-dwarf-single.o
+  debug_addr:
+    - Length:          0x14
+      Version:         0x5
+      AddressSize:     0x8
+      Entries:
+        - {}
+        - Address:         0x10
+...

diff  --git a/llvm/test/tools/llvm-profgen/Inputs/split-dwarf-split.dwo.yaml b/llvm/test/tools/llvm-profgen/Inputs/split-dwarf-split.dwo.yaml
new file mode 100644
index 0000000000000..688011a57a104
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/Inputs/split-dwarf-split.dwo.yaml
@@ -0,0 +1,52 @@
+# Source code:
+
+# int foo() {
+#   return 1;
+# }
+#
+# int main() {
+#   foo();
+#   return 0;
+# }
+
+# Build instructions (clang version 15.0.0):
+# split-dwarf-split.dwo:      clang -gsplit-dwarf=split -fdebug-compilation-dir=.  test.c  -fdebug-info-for-profiling  -O0 -g -o split-dwarf-split.o -c
+# split-dwarf-split.dwo.yaml:  obj2yaml split-dwarf-split.dwo > split-dwarf-split.dwo.yaml
+
+--- !ELF
+FileHeader:
+  Class:           ELFCLASS64
+  Data:            ELFDATA2LSB
+  Type:            ET_REL
+  Machine:         EM_X86_64
+  SectionHeaderStringTable: .strtab
+Sections:
+  - Name:            .debug_str_offsets.dwo
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_EXCLUDE ]
+    AddressAlign:    0x1
+    Content:         1C000000050000000000000004000000080000000D000000760000007D000000
+  - Name:            .debug_str.dwo
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_EXCLUDE, SHF_MERGE, SHF_STRINGS ]
+    AddressAlign:    0x1
+    EntSize:         0x1
+    Content:         666F6F00696E74006D61696E00636C616E672076657273696F6E2031352E302E30202868747470733A2F2F6769746875622E636F6D2F6C6C766D2F6C6C766D2D70726F6A6563742E67697420313565336538396239626162323838363862303930656539663336326161386630333233303934372900746573742E630073706C69742D64776172662D73706C69742E64776F00
+  - Name:            .debug_info.dwo
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_EXCLUDE ]
+    AddressAlign:    0x1
+    Content:         3900000005000508000000000428758115ED87CF01030C00040502000B00000001560000013800000002011C0000000156020005380000000301050400
+  - Name:            .debug_abbrev.dwo
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_EXCLUDE ]
+    AddressAlign:    0x1
+    Content:         01110125251305032576250000022E00111B1206401803253A0B3B0B49133F19000003240003253E0B0B0B000000
+  - Type:            SectionHeaderTable
+    Sections:
+      - Name:            .strtab
+      - Name:            .debug_str_offsets.dwo
+      - Name:            .debug_str.dwo
+      - Name:            .debug_info.dwo
+      - Name:            .debug_abbrev.dwo
+...

diff  --git a/llvm/test/tools/llvm-profgen/Inputs/split-dwarf-split.exe b/llvm/test/tools/llvm-profgen/Inputs/split-dwarf-split.exe
new file mode 100755
index 0000000000000..4d0ff5700b1a4
Binary files /dev/null and b/llvm/test/tools/llvm-profgen/Inputs/split-dwarf-split.exe 
diff er

diff  --git a/llvm/test/tools/llvm-profgen/split-dwarf.test b/llvm/test/tools/llvm-profgen/split-dwarf.test
new file mode 100644
index 0000000000000..09daea7f033dc
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/split-dwarf.test
@@ -0,0 +1,50 @@
+; RUN: rm -rf %t
+; RUN: mkdir -p %t
+; RUN: cd %t
+
+; RUN: echo -e "0\n0" > %t.prof
+
+; Test -gsplit-dwarf=single
+; RUN: cp %S/Inputs/split-dwarf-single.exe %t/split-dwarf-single.exe
+; RUN: yaml2obj %S/Inputs/split-dwarf-single.o.yaml -o %t/split-dwarf-single.o
+; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%t/split-dwarf-single.exe --output=%t1 --fill-zero-for-all-funcs
+; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK-SPLIT-DWARF
+
+; Test -gsplit-dwarf=split
+; RUN: cp %S/Inputs/split-dwarf-split.exe %t/split-dwarf-split.exe
+; RUN: yaml2obj %S/Inputs/split-dwarf-split.dwo.yaml -o %t/split-dwarf-split.dwo
+; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%t/split-dwarf-split.exe --output=%t2 --fill-zero-for-all-funcs
+; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-SPLIT-DWARF
+; Test --dwp
+; RUN: llvm-dwp %t/split-dwarf-split.dwo -o %t/split-dwarf-split.dwp
+; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --dwp=%t/split-dwarf-split.dwp --binary=%t/split-dwarf-split.exe --output=%t3 --fill-zero-for-all-funcs
+; RUN: FileCheck %s --input-file %t3 --check-prefix=CHECK-SPLIT-DWARF
+
+; Make sure that all function ranges are well retrieved and have been populated with zero.
+
+; CHECK-SPLIT-DWARF:       foo:0:0
+; CHECK-SPLIT-DWARF-NEXT:   0: 0
+; CHECK-SPLIT-DWARF-NEXT:   1: 0
+; CHECK-SPLIT-DWARF-NEXT:  main:0:0
+; CHECK-SPLIT-DWARF-NEXT:   0: 0
+; CHECK-SPLIT-DWARF-NEXT:   1: 0
+; CHECK-SPLIT-DWARF-NEXT:   2: 0
+
+
+; Build instructions:
+; split-dwarf-single.o:       clang -gsplit-dwarf=single -fdebug-compilation-dir=.  test.c   -fdebug-info-for-profiling  -O0 -g -o split-dwarf-single.o -c
+; split-dwarf-single.exe:     clang -fdebug-compilation-dir=.  split-dwarf-single.o -o split-dwarf-single.exe  -fdebug-info-for-profiling  -O0 -g
+
+; split-dwarf-split.dwo:      clang -gsplit-dwarf=split -fdebug-compilation-dir=.  test.c  -fdebug-info-for-profiling  -O0 -g -o split-dwarf-split.o -c
+; split-dwarf-split.exe:      clang -fdebug-compilation-dir=.  split-dwarf-split.o -o split-dwarf-split.exe  -fdebug-info-for-profiling  -O0 -g
+
+; Source code:
+
+int foo() {
+  return 1;
+}
+
+int main() {
+  foo();
+  return 0;
+}

diff  --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 3430b030c01a8..dca69064b5d8f 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -43,6 +43,11 @@ static cl::opt<bool> UseDwarfCorrelation(
     cl::desc("Use dwarf for profile correlation even when binary contains "
              "pseudo probe."));
 
+static cl::opt<std::string>
+    DWPPath("dwp", cl::init(""), cl::ZeroOrMore,
+            cl::desc("Path of .dwp file. When not specified, it will be "
+                     "<binary>.dwp in the same directory as the main binary."));
+
 static cl::list<std::string> DisassembleFunctions(
     "disassemble-functions", cl::CommaSeparated,
     cl::desc("List of functions to print disassembly for. Accept demangled "
@@ -610,69 +615,94 @@ void ProfiledBinary::checkUseFSDiscriminator(
   }
 }
 
-void ProfiledBinary::loadSymbolsFromDWARF(ObjectFile &Obj) {
-  auto DebugContext = llvm::DWARFContext::create(Obj);
-  if (!DebugContext)
-    exitWithError("Misssing debug info.", Path);
+void ProfiledBinary::loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit) {
+  for (const auto &DieInfo : CompilationUnit.dies()) {
+    llvm::DWARFDie Die(&CompilationUnit, &DieInfo);
 
-  for (const auto &CompilationUnit : DebugContext->compile_units()) {
-    for (const auto &DieInfo : CompilationUnit->dies()) {
-      llvm::DWARFDie Die(CompilationUnit.get(), &DieInfo);
+    if (!Die.isSubprogramDIE())
+      continue;
+    auto Name = Die.getName(llvm::DINameKind::LinkageName);
+    if (!Name)
+      Name = Die.getName(llvm::DINameKind::ShortName);
+    if (!Name)
+      continue;
 
-      if (!Die.isSubprogramDIE())
-        continue;
-      auto Name = Die.getName(llvm::DINameKind::LinkageName);
-      if (!Name)
-        Name = Die.getName(llvm::DINameKind::ShortName);
-      if (!Name)
-        continue;
+    auto RangesOrError = Die.getAddressRanges();
+    if (!RangesOrError)
+      continue;
+    const DWARFAddressRangesVector &Ranges = RangesOrError.get();
 
-      auto RangesOrError = Die.getAddressRanges();
-      if (!RangesOrError)
-        continue;
-      const DWARFAddressRangesVector &Ranges = RangesOrError.get();
+    if (Ranges.empty())
+      continue;
+
+    // Different DWARF symbols can have same function name, search or create
+    // BinaryFunction indexed by the name.
+    auto Ret = BinaryFunctions.emplace(Name, BinaryFunction());
+    auto &Func = Ret.first->second;
+    if (Ret.second)
+      Func.FuncName = Ret.first->first;
 
-      if (Ranges.empty())
+    for (const auto &Range : Ranges) {
+      uint64_t FuncStart = Range.LowPC;
+      uint64_t FuncSize = Range.HighPC - FuncStart;
+
+      if (FuncSize == 0 || FuncStart < getPreferredBaseAddress())
         continue;
 
-      // Different DWARF symbols can have same function name, search or create
-      // BinaryFunction indexed by the name.
-      auto Ret = BinaryFunctions.emplace(Name, BinaryFunction());
-      auto &Func = Ret.first->second;
-      if (Ret.second)
-        Func.FuncName = Ret.first->first;
-
-      for (const auto &Range : Ranges) {
-        uint64_t FuncStart = Range.LowPC;
-        uint64_t FuncSize = Range.HighPC - FuncStart;
-
-        if (FuncSize == 0 || FuncStart < getPreferredBaseAddress())
-          continue;
-
-        uint64_t StartOffset = FuncStart - getPreferredBaseAddress();
-        uint64_t EndOffset = Range.HighPC - getPreferredBaseAddress();
-
-        // We may want to know all ranges for one function. Here group the
-        // ranges and store them into BinaryFunction.
-        Func.Ranges.emplace_back(StartOffset, EndOffset);
-
-        auto R = StartOffset2FuncRangeMap.emplace(StartOffset, FuncRange());
-        if (R.second) {
-          FuncRange &FRange = R.first->second;
-          FRange.Func = &Func;
-          FRange.StartOffset = StartOffset;
-          FRange.EndOffset = EndOffset;
-        } else {
-          WithColor::warning()
-              << "Duplicated symbol start address at "
-              << format("%8" PRIx64, StartOffset + getPreferredBaseAddress())
-              << " " << R.first->second.getFuncName() << " and " << Name
-              << "\n";
-        }
+      uint64_t StartOffset = FuncStart - getPreferredBaseAddress();
+      uint64_t EndOffset = Range.HighPC - getPreferredBaseAddress();
+
+      // We may want to know all ranges for one function. Here group the
+      // ranges and store them into BinaryFunction.
+      Func.Ranges.emplace_back(StartOffset, EndOffset);
+
+      auto R = StartOffset2FuncRangeMap.emplace(StartOffset, FuncRange());
+      if (R.second) {
+        FuncRange &FRange = R.first->second;
+        FRange.Func = &Func;
+        FRange.StartOffset = StartOffset;
+        FRange.EndOffset = EndOffset;
+      } else {
+        WithColor::warning()
+            << "Duplicated symbol start address at "
+            << format("%8" PRIx64, StartOffset + getPreferredBaseAddress())
+            << " " << R.first->second.getFuncName() << " and " << Name << "\n";
       }
     }
   }
-  assert(!StartOffset2FuncRangeMap.empty() && "Misssing debug info.");
+}
+
+void ProfiledBinary::loadSymbolsFromDWARF(ObjectFile &Obj) {
+  auto DebugContext = llvm::DWARFContext::create(
+      Obj, DWARFContext::ProcessDebugRelocations::Process, nullptr, DWPPath);
+  if (!DebugContext)
+    exitWithError("Error creating the debug info context", Path);
+
+  for (const auto &CompilationUnit : DebugContext->compile_units())
+    loadSymbolsFromDWARFUnit(*CompilationUnit.get());
+
+  // Handles DWO sections that can either be in .o, .dwo or .dwp files.
+  for (const auto &CompilationUnit : DebugContext->compile_units()) {
+    DWARFUnit *const DwarfUnit = CompilationUnit.get();
+    if (llvm::Optional<uint64_t> DWOId = DwarfUnit->getDWOId()) {
+      DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit();
+      if (!DWOCU->isDWOUnit()) {
+        std::string DWOName = dwarf::toString(
+            DwarfUnit->getUnitDIE().find(
+                {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
+            "");
+        WithColor::warning()
+            << "DWO debug information for " << DWOName
+            << " was not loaded. Please check the .o, .dwo or .dwp path.\n";
+        continue;
+      }
+      loadSymbolsFromDWARFUnit(*DWOCU);
+    }
+  }
+
+  if (BinaryFunctions.empty())
+    WithColor::warning() << "Loading of DWARF info completed, but no binary "
+                            "functions have been retrieved.\n";
 }
 
 void ProfiledBinary::populateSymbolListFromDWARF(
@@ -689,6 +719,7 @@ void ProfiledBinary::setupSymbolizer() {
   SymbolizerOpts.DefaultArch = TheTriple.getArchName().str();
   SymbolizerOpts.UseSymbolTable = false;
   SymbolizerOpts.RelativeAddresses = false;
+  SymbolizerOpts.DWPName = DWPPath;
   Symbolizer = std::make_unique<symbolize::LLVMSymbolizer>(SymbolizerOpts);
 }
 

diff  --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index 33b0b81fb0468..d359f79749d69 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -290,6 +290,9 @@ class ProfiledBinary {
   // Load debug info of subprograms from DWARF section.
   void loadSymbolsFromDWARF(ObjectFile &Obj);
 
+  // Load debug info from DWARF unit.
+  void loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit);
+
   // A function may be spilt into multiple non-continuous address ranges. We use
   // this to set whether start offset of a function is the real entry of the
   // function and also set false to the non-function label.


        


More information about the llvm-commits mailing list