[llvm] Modify llvm-gsymutil to ignore invalid file indexes. (PR #70876)

Greg Clayton via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 1 14:36:06 PDT 2023


https://github.com/clayborg updated https://github.com/llvm/llvm-project/pull/70876

>From 1a7b9a032f1c699d7eea217c214ffa00a2b4aafb Mon Sep 17 00:00:00 2001
From: Greg Clayton <gclayton at fb.com>
Date: Mon, 30 Oct 2023 17:14:19 -0700
Subject: [PATCH] Modify llvm-gsymutil to ignore invalid file indexes.

DWARF produced by LTO and BOLT can sometimes be broken where file indexes are beyond the end of the line table's file list in the prologue. This patch allows llvm-gsymutil to convert this DWARF without crashing, and emits errors when:
- line table contains entries with an invalid file index (line entry will be removed)
- inline functions that have invalid DW_AT_call_file file indexes
- when there are no line table entries for a function and we fall back to making a single line table entry from the functions DW_AT_decl_file/DW_AT_decl_line attributes, we make sure the DW_AT_decl_file attribute is valid before emitting it.
---
 llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp |  63 +++-
 llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp   | 356 +++++++++++++++++++
 2 files changed, 405 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
index d720c1e33495515..760ab3e1f8fbd61 100644
--- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
+++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
@@ -65,10 +65,10 @@ struct llvm::gsym::CUInfo {
   /// the first client that asks for a compile unit file index will end up
   /// doing the conversion, and subsequent clients will get the cached GSYM
   /// index.
-  uint32_t DWARFToGSYMFileIndex(GsymCreator &Gsym, uint32_t DwarfFileIdx) {
-    if (!LineTable)
-      return 0;
-    assert(DwarfFileIdx < FileCache.size());
+  std::optional<uint32_t> DWARFToGSYMFileIndex(GsymCreator &Gsym,
+                                               uint32_t DwarfFileIdx) {
+    if (!LineTable || DwarfFileIdx >= FileCache.size())
+      return std::nullopt;
     uint32_t &GsymFileIdx = FileCache[DwarfFileIdx];
     if (GsymFileIdx != UINT32_MAX)
       return GsymFileIdx;
@@ -272,14 +272,24 @@ static void parseInlineInfo(GsymCreator &Gsym, raw_ostream *Log, CUInfo &CUI,
 
     if (auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym))
       II.Name = *NameIndex;
-    II.CallFile = CUI.DWARFToGSYMFileIndex(
-        Gsym, dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_file), 0));
-    II.CallLine = dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_line), 0);
-    // parse all children and append to parent
-    for (DWARFDie ChildDie : Die.children())
-      parseInlineInfo(Gsym, Log, CUI, ChildDie, Depth + 1, FI, II,
-                      AllInlineRanges, WarnIfEmpty);
-    Parent.Children.emplace_back(std::move(II));
+    const uint64_t DwarfFileIdx = dwarf::toUnsigned(
+        Die.findRecursively(dwarf::DW_AT_call_file), UINT32_MAX);
+    std::optional<uint32_t> OptGSymFileIdx =
+        CUI.DWARFToGSYMFileIndex(Gsym, DwarfFileIdx);
+    if (OptGSymFileIdx) {
+      II.CallFile = OptGSymFileIdx.value();
+      II.CallLine = dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_line), 0);
+      // parse all children and append to parent
+      for (DWARFDie ChildDie : Die.children())
+        parseInlineInfo(Gsym, Log, CUI, ChildDie, Depth + 1, FI, II,
+                        AllInlineRanges, WarnIfEmpty);
+      Parent.Children.emplace_back(std::move(II));
+    } else if (Log) {
+      *Log << "error: inlined function DIE at " << HEX32(Die.getOffset())
+           << " has an invalid file index " << DwarfFileIdx
+           << " in its DW_AT_call_file attribute, this inline entry and all "
+           << "children will be removed.\n";
+    }
     return;
   }
   if (Tag == dwarf::DW_TAG_subprogram || Tag == dwarf::DW_TAG_lexical_block) {
@@ -306,8 +316,20 @@ static void convertFunctionLineTable(raw_ostream *Log, CUInfo &CUI,
     // the DW_AT_decl_file an d DW_AT_decl_line if we have both attributes.
     std::string FilePath = Die.getDeclFile(
         DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath);
-    if (FilePath.empty())
+    if (FilePath.empty()) {
+      // If we had a DW_AT_decl_file, but got no file then we need to emit a
+      // warning.
+      if (Log) {
+        const uint64_t DwarfFileIdx = dwarf::toUnsigned(
+            Die.findRecursively(dwarf::DW_AT_decl_file), UINT32_MAX);
+        *Log << "error: function DIE at " << HEX32(Die.getOffset())
+             << " has an invalid file index " << DwarfFileIdx
+             << " in its DW_AT_decl_file attribute, unable to create a single "
+             << "line entry from the DW_AT_decl_file/DW_AT_decl_line "
+             << "attributes.\n";
+      }
       return;
+    }
     if (auto Line =
             dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_line}))) {
       LineEntry LE(StartAddress, Gsym.insertFile(FilePath), *Line);
@@ -322,7 +344,20 @@ static void convertFunctionLineTable(raw_ostream *Log, CUInfo &CUI,
   for (uint32_t RowIndex : RowVector) {
     // Take file number and line/column from the row.
     const DWARFDebugLine::Row &Row = CUI.LineTable->Rows[RowIndex];
-    const uint32_t FileIdx = CUI.DWARFToGSYMFileIndex(Gsym, Row.File);
+    std::optional<uint32_t> OptFileIdx =
+        CUI.DWARFToGSYMFileIndex(Gsym, Row.File);
+    if (!OptFileIdx) {
+      if (Log) {
+        *Log << "error: function DIE at " << HEX32(Die.getOffset()) << " has "
+             << "a line entry with invalid DWARF file index, this entry will "
+             << "be removed:\n";
+        Row.dumpTableHeader(*Log, /*Indent=*/0);
+        Row.dump(*Log);
+        *Log << "\n";
+      }
+      continue;
+    }
+    const uint32_t FileIdx = OptFileIdx.value();
     uint64_t RowAddress = Row.Address.Address;
     // Watch out for a RowAddress that is in the middle of a line table entry
     // in the DWARF. If we pass an address in between two line table entries
diff --git a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
index ad81a2fcd16441a..a2f26ed28728966 100644
--- a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
+++ b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
@@ -4157,3 +4157,359 @@ TEST(GSYMTest, TestEmptyLinkageName) {
   // Make sure we don't see spurious errors in the output:
   EXPECT_TRUE(errors.find("error:") == std::string::npos);
 }
+
+TEST(GSYMTest, TestHandlingOfInvalidFileIndexes) {
+  // Test that llvm-gsymutil can handle invalid file indexes in the following
+  // cases:
+  //  - In line entries in the line table
+  //  - When parsing inline entries that have a DW_AT_call_file
+  //  - When parsing function dies with no line table entries and it tries to
+  //    use the DW_AT_decl_file
+  //
+  //
+  // 0x0000000b: DW_TAG_compile_unit
+  //               DW_AT_name        ("/tmp/main.cpp")
+  //               DW_AT_language    (DW_LANG_C)
+  //               DW_AT_stmt_list   (0x00000000)
+  //
+  // 0x00000015:   DW_TAG_subprogram
+  //                 DW_AT_name      ("foo")
+  //                 DW_AT_low_pc    (0x0000000000001000)
+  //                 DW_AT_high_pc   (0x0000000000001050)
+  //
+  // 0x0000002a:     DW_TAG_inlined_subroutine
+  //                   DW_AT_name    ("inline_with_invalid_call_file")
+  //                   DW_AT_low_pc  (0x0000000000001010)
+  //                   DW_AT_high_pc (0x0000000000001020)
+  //                   DW_AT_call_file       (0x0000000a)
+  //                   DW_AT_call_line       (11)
+  //
+  // 0x00000047:       DW_TAG_inlined_subroutine
+  //                     DW_AT_name
+  //                     ("inline_inside_parent_with_invalid_call_file")
+  //                     DW_AT_low_pc        (0x0000000000001010)
+  //                     DW_AT_high_pc       (0x0000000000001015)
+  //                     DW_AT_call_file     ("/tmp/main.cpp")
+  //                     DW_AT_call_line     (12)
+  //
+  // 0x00000064:       NULL
+  //
+  // 0x00000065:     DW_TAG_inlined_subroutine
+  //                   DW_AT_name    ("inline_with_valid_call_file")
+  //                   DW_AT_low_pc  (0x0000000000001020)
+  //                   DW_AT_high_pc (0x0000000000001030)
+  //                   DW_AT_call_file       ("/tmp/main.cpp")
+  //                   DW_AT_call_line       (13)
+  //
+  // 0x00000082:       DW_TAG_inlined_subroutine
+  //                     DW_AT_name
+  //                     ("inline_inside_parent_with_valid_call_file")
+  //                     DW_AT_low_pc        (0x0000000000001020)
+  //                     DW_AT_high_pc       (0x0000000000001025)
+  //                     DW_AT_call_file     ("/tmp/main.cpp")
+  //                     DW_AT_call_line     (14)
+  //
+  // 0x0000009f:       NULL
+  //
+  // 0x000000a0:     NULL
+  //
+  // 0x000000a1:   DW_TAG_subprogram
+  //                 DW_AT_name      ("func_with_valid_decl_file")
+  //                 DW_AT_decl_file ("/tmp/main.cpp")
+  //                 DW_AT_decl_line (20)
+  //                 DW_AT_low_pc    (0x0000000000002000)
+  //                 DW_AT_high_pc   (0x0000000000002050)
+  //
+  // 0x000000b8:   DW_TAG_subprogram
+  //                 DW_AT_name      ("func_with_invalid_decl_file")
+  //                 DW_AT_decl_file (0x0a)
+  //                 DW_AT_decl_line (20)
+  //                 DW_AT_low_pc    (0x0000000000003000)
+  //                 DW_AT_high_pc   (0x0000000000003050)
+  //
+  // 0x000000cf:   NULL
+  //
+  // The table looks has an entry at address 0x0000000000001010 that has an
+  // invalid file index that needs to be removed.
+  //
+  // Address            Line   Column File   ISA Discriminator Flags
+  // ---------- ------ ------ ------ --- ------------- -------------
+  // 0x00001000     10      0      1   0             0  is_stmt
+  // 0x00001010     11      0     10   0             0  is_stmt
+  // 0x00001020     11      0      1   0             0  is_stmt
+  // 0x00001030     12      0      1   0             0  is_stmt
+  // 0x00001050     12      0      1   0             0  is_stmt end_sequence
+
+  StringRef yamldata = R"(
+  debug_str:
+    - ''
+    - '/tmp/main.cpp'
+    - foo
+    - inline_with_invalid_call_file
+    - inline_inside_parent_with_invalid_call_file
+    - inline_with_valid_call_file
+    - inline_inside_parent_with_valid_call_file
+    - func_with_valid_decl_file
+    - func_with_invalid_decl_file
+  debug_abbrev:
+    - ID:              0
+      Table:
+        - Code:            0x1
+          Tag:             DW_TAG_compile_unit
+          Children:        DW_CHILDREN_yes
+          Attributes:
+            - Attribute:       DW_AT_name
+              Form:            DW_FORM_strp
+            - Attribute:       DW_AT_language
+              Form:            DW_FORM_udata
+            - Attribute:       DW_AT_stmt_list
+              Form:            DW_FORM_sec_offset
+        - Code:            0x2
+          Tag:             DW_TAG_subprogram
+          Children:        DW_CHILDREN_yes
+          Attributes:
+            - Attribute:       DW_AT_name
+              Form:            DW_FORM_strp
+            - Attribute:       DW_AT_low_pc
+              Form:            DW_FORM_addr
+            - Attribute:       DW_AT_high_pc
+              Form:            DW_FORM_addr
+        - Code:            0x3
+          Tag:             DW_TAG_inlined_subroutine
+          Children:        DW_CHILDREN_yes
+          Attributes:
+            - Attribute:       DW_AT_name
+              Form:            DW_FORM_strp
+            - Attribute:       DW_AT_low_pc
+              Form:            DW_FORM_addr
+            - Attribute:       DW_AT_high_pc
+              Form:            DW_FORM_addr
+            - Attribute:       DW_AT_call_file
+              Form:            DW_FORM_data4
+            - Attribute:       DW_AT_call_line
+              Form:            DW_FORM_data4
+        - Code:            0x4
+          Tag:             DW_TAG_inlined_subroutine
+          Children:        DW_CHILDREN_no
+          Attributes:
+            - Attribute:       DW_AT_name
+              Form:            DW_FORM_strp
+            - Attribute:       DW_AT_low_pc
+              Form:            DW_FORM_addr
+            - Attribute:       DW_AT_high_pc
+              Form:            DW_FORM_addr
+            - Attribute:       DW_AT_call_file
+              Form:            DW_FORM_data4
+            - Attribute:       DW_AT_call_line
+              Form:            DW_FORM_data4
+        - Code:            0x5
+          Tag:             DW_TAG_subprogram
+          Children:        DW_CHILDREN_no
+          Attributes:
+            - Attribute:       DW_AT_name
+              Form:            DW_FORM_strp
+            - Attribute:       DW_AT_decl_file
+              Form:            DW_FORM_data1
+            - Attribute:       DW_AT_decl_line
+              Form:            DW_FORM_data1
+            - Attribute:       DW_AT_low_pc
+              Form:            DW_FORM_addr
+            - Attribute:       DW_AT_high_pc
+              Form:            DW_FORM_addr
+  debug_info:
+    - Length:          0xCC
+      Version:         4
+      AbbrevTableID:   0
+      AbbrOffset:      0x0
+      AddrSize:        8
+      Entries:
+        - AbbrCode:        0x1
+          Values:
+            - Value:           0x1
+            - Value:           0x2
+            - Value:           0x0
+        - AbbrCode:        0x2
+          Values:
+            - Value:           0xF
+            - Value:           0x1000
+            - Value:           0x1050
+        - AbbrCode:        0x3
+          Values:
+            - Value:           0x13
+            - Value:           0x1010
+            - Value:           0x1020
+            - Value:           0xA
+            - Value:           0xB
+        - AbbrCode:        0x4
+          Values:
+            - Value:           0x31
+            - Value:           0x1010
+            - Value:           0x1015
+            - Value:           0x1
+            - Value:           0xC
+        - AbbrCode:        0x0
+        - AbbrCode:        0x3
+          Values:
+            - Value:           0x5D
+            - Value:           0x1020
+            - Value:           0x1030
+            - Value:           0x1
+            - Value:           0xD
+        - AbbrCode:        0x4
+          Values:
+            - Value:           0x79
+            - Value:           0x1020
+            - Value:           0x1025
+            - Value:           0x1
+            - Value:           0xE
+        - AbbrCode:        0x0
+        - AbbrCode:        0x0
+        - AbbrCode:        0x5
+          Values:
+            - Value:           0xA3
+            - Value:           0x1
+            - Value:           0x14
+            - Value:           0x2000
+            - Value:           0x2050
+        - AbbrCode:        0x5
+          Values:
+            - Value:           0xBD
+            - Value:           0xA
+            - Value:           0x14
+            - Value:           0x3000
+            - Value:           0x3050
+        - AbbrCode:        0x0
+  debug_line:
+    - Length:          78
+      Version:         2
+      PrologueLength:  36
+      MinInstLength:   1
+      DefaultIsStmt:   1
+      LineBase:        251
+      LineRange:       14
+      OpcodeBase:      13
+      StandardOpcodeLengths: [ 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 ]
+      IncludeDirs:
+        - '/tmp'
+      Files:
+        - Name:            main.cpp
+          DirIdx:          1
+          ModTime:         0
+          Length:          0
+      Opcodes:
+        - Opcode:          DW_LNS_extended_op
+          ExtLen:          9
+          SubOpcode:       DW_LNE_set_address
+          Data:            4096
+        - Opcode:          DW_LNS_advance_line
+          SData:           9
+          Data:            0
+        - Opcode:          DW_LNS_copy
+          Data:            0
+        - Opcode:          DW_LNS_advance_pc
+          Data:            16
+        - Opcode:          DW_LNS_set_file
+          Data:            10
+        - Opcode:          DW_LNS_advance_line
+          SData:           1
+          Data:            0
+        - Opcode:          DW_LNS_copy
+          Data:            0
+        - Opcode:          DW_LNS_advance_pc
+          Data:            16
+        - Opcode:          DW_LNS_set_file
+          Data:            1
+        - Opcode:          DW_LNS_copy
+          Data:            0
+        - Opcode:          DW_LNS_advance_pc
+          Data:            16
+        - Opcode:          DW_LNS_advance_line
+          SData:           1
+          Data:            0
+        - Opcode:          DW_LNS_copy
+          Data:            0
+        - Opcode:          DW_LNS_advance_pc
+          Data:            32
+        - Opcode:          DW_LNS_extended_op
+          ExtLen:          1
+          SubOpcode:       DW_LNE_end_sequence
+          Data:            0
+  )";
+  auto ErrOrSections = DWARFYAML::emitDebugSections(yamldata);
+  ASSERT_THAT_EXPECTED(ErrOrSections, Succeeded());
+  std::unique_ptr<DWARFContext> DwarfContext =
+      DWARFContext::create(*ErrOrSections, 8);
+  ASSERT_TRUE(DwarfContext.get() != nullptr);
+  std::string errors;
+  raw_string_ostream OS(errors);
+  GsymCreator GC;
+  DwarfTransformer DT(*DwarfContext, GC);
+  const uint32_t ThreadCount = 1;
+  ASSERT_THAT_ERROR(DT.convert(ThreadCount, &OS), Succeeded());
+  ASSERT_THAT_ERROR(GC.finalize(OS), Succeeded());
+  OS.flush();
+  SmallString<512> Str;
+  raw_svector_ostream OutStrm(Str);
+  const auto ByteOrder = llvm::endianness::native;
+  FileWriter FW(OutStrm, ByteOrder);
+  ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
+  Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+  ASSERT_THAT_EXPECTED(GR, Succeeded());
+  // There should be one function in our GSYM.
+  EXPECT_EQ(GR->getNumAddresses(), 3u);
+  // Verify "foo" is present and has a line table and no inline info.
+  auto ExpFI = GR->getFunctionInfo(0x1000);
+  ASSERT_THAT_EXPECTED(ExpFI, Succeeded());
+  ASSERT_EQ(ExpFI->Range, AddressRange(0x1000, 0x1050));
+  StringRef FuncName = GR->getString(ExpFI->Name);
+  EXPECT_EQ(FuncName, "foo");
+
+  EXPECT_TRUE(ExpFI->OptLineTable.has_value());
+  // Make sure we only have 3 entries to show we removed the line entry with
+  // the invalid file index whose address is 0x0000000000001010.
+  ASSERT_EQ(ExpFI->OptLineTable->size(), 3u);
+  EXPECT_TRUE(ExpFI->Inline.has_value());
+
+  // Make sure that we only have one inline function, not two. We remove one of
+  // the inline functions because it has an invalid DW_AT_call_file attribute.
+  ASSERT_EQ(ExpFI->Inline->Children.size(), 1u);
+  StringRef InlineName = GR->getString(ExpFI->Inline->Children[0].Name);
+  EXPECT_EQ(InlineName, "inline_with_valid_call_file");
+
+  ExpFI = GR->getFunctionInfo(0x0000000000002000);
+  ASSERT_THAT_EXPECTED(ExpFI, Succeeded());
+  ASSERT_EQ(ExpFI->Range, AddressRange(0x2000, 0x2050));
+  FuncName = GR->getString(ExpFI->Name);
+  EXPECT_EQ(FuncName, "func_with_valid_decl_file");
+  EXPECT_FALSE(ExpFI->Inline.has_value());
+  // Make sure we only have 1 entry in the line table which indicates we were
+  // able to parse the DW_AT_decl_file/DW_AT_decl_line correctly.
+  EXPECT_TRUE(ExpFI->OptLineTable.has_value());
+  ASSERT_EQ(ExpFI->OptLineTable->size(), 1u);
+
+  ExpFI = GR->getFunctionInfo(0x0000000000003000);
+  ASSERT_THAT_EXPECTED(ExpFI, Succeeded());
+  ASSERT_EQ(ExpFI->Range, AddressRange(0x3000, 0x3050));
+  FuncName = GR->getString(ExpFI->Name);
+  EXPECT_EQ(FuncName, "func_with_invalid_decl_file");
+  EXPECT_FALSE(ExpFI->Inline.has_value());
+  // Make sure we only no line table because there are no line entries in the
+  // line table and the DW_AT_decl_file attribute was invalid so we were not
+  // able to parse the DW_AT_decl_file/DW_AT_decl_line correctly.
+  EXPECT_FALSE(ExpFI->OptLineTable.has_value());
+
+  // Make sure we don't see spurious errors in the output:
+  std::vector<std::string> ExpectedLogErrors = {
+      "error: function DIE at 0x00000015 has a line entry with invalid DWARF "
+      "file index, this entry will be removed:",
+      "error: inlined function DIE at 0x0000002a has an invalid file index 10 "
+      "in its DW_AT_call_file attribute, this inline entry and all children "
+      "will be removed.",
+      "error: function DIE at 0x000000b8 has an invalid file index 10 in its "
+      "DW_AT_decl_file attribute, unable to create a single line entry from "
+      "the DW_AT_decl_file/DW_AT_decl_line attributes."};
+  // Make sure all expected errors are in the error stream for the two invalid
+  // inlined functions that we removed due to invalid range scoping.
+  for (const auto &Error : ExpectedLogErrors)
+    EXPECT_TRUE(errors.find(Error) != std::string::npos);
+}



More information about the llvm-commits mailing list