[llvm] [BOLT][DWARF] Fix handling .debug_str_offsets for type units (PR #75522)

Alexander Yermolovich via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 14 16:54:12 PST 2023


https://github.com/ayermolo updated https://github.com/llvm/llvm-project/pull/75522

>From 052af9ef533a2dff3aec87707ffe97304973fc65 Mon Sep 17 00:00:00 2001
From: Alexander Yermolovich <ayermolo at meta.com>
Date: Thu, 14 Dec 2023 11:40:47 -0800
Subject: [PATCH 1/3] [BOLT][DWARF] Fix handling .debug_str_offsets for type
 units

There was an assumpiton that TUs and CUs share .debug_str_offsets contribution.
For ThinLTO builds it is not the case. Changed so that we parse contributions
for TUs also, and did some refactoring so that we don't re-parse contributions
that were not modified.
---
 bolt/include/bolt/Core/DIEBuilder.h           |   4 +-
 bolt/include/bolt/Core/DebugData.h            |   8 +-
 bolt/lib/Core/DIEBuilder.cpp                  |  10 +-
 bolt/lib/Core/DebugData.cpp                   |  39 +--
 bolt/lib/Rewrite/DWARFRewriter.cpp            |   7 +-
 bolt/test/X86/Inputs/dwarf5-basic-cu.s        | 156 ++++++++++
 bolt/test/X86/Inputs/dwarf5-types-no-cu.s     | 281 ++++++++++++++++++
 ...arf5-type-unit-no-cu-str-offset-table.test |  58 ++++
 8 files changed, 532 insertions(+), 31 deletions(-)
 create mode 100644 bolt/test/X86/Inputs/dwarf5-basic-cu.s
 create mode 100644 bolt/test/X86/Inputs/dwarf5-types-no-cu.s
 create mode 100644 bolt/test/X86/dwarf5-type-unit-no-cu-str-offset-table.test

diff --git a/bolt/include/bolt/Core/DIEBuilder.h b/bolt/include/bolt/Core/DIEBuilder.h
index 1c5252142d4ebf..f89084065aae1c 100644
--- a/bolt/include/bolt/Core/DIEBuilder.h
+++ b/bolt/include/bolt/Core/DIEBuilder.h
@@ -33,6 +33,7 @@ namespace llvm {
 
 namespace bolt {
 class DIEStreamer;
+class DebugStrOffsetsWriter;
 
 class DIEBuilder {
   friend DIEStreamer;
@@ -266,7 +267,8 @@ class DIEBuilder {
   ProcessingType getCurrentProcessingState() { return getState().Type; }
 
   /// Constructs IR for Type Units.
-  void buildTypeUnits(const bool Init = true);
+  void buildTypeUnits(DebugStrOffsetsWriter *StrOffsetWriter = nullptr,
+                      const bool Init = true);
   /// Constructs IR for all the CUs.
   void buildCompileUnits(const bool Init = true);
   /// Constructs IR for CUs in a vector.
diff --git a/bolt/include/bolt/Core/DebugData.h b/bolt/include/bolt/Core/DebugData.h
index 9f0dd88b115fcc..31a636ba2ce653 100644
--- a/bolt/include/bolt/Core/DebugData.h
+++ b/bolt/include/bolt/Core/DebugData.h
@@ -436,10 +436,6 @@ class DebugStrOffsetsWriter {
     StrOffsetsStream = std::make_unique<raw_svector_ostream>(*StrOffsetsBuffer);
   }
 
-  /// Initializes Buffer and Stream.
-  void initialize(const DWARFSection &StrOffsetsSection,
-                  const std::optional<StrOffsetsContributionDescriptor> Contr);
-
   /// Update Str offset in .debug_str in .debug_str_offsets.
   void updateAddressMap(uint32_t Index, uint32_t Address);
 
@@ -455,9 +451,13 @@ class DebugStrOffsetsWriter {
   }
 
 private:
+  /// Initializes Buffer and Stream.
+  void initialize(DWARFUnit &Unit);
+
   std::unique_ptr<DebugStrOffsetsBufferVector> StrOffsetsBuffer;
   std::unique_ptr<raw_svector_ostream> StrOffsetsStream;
   std::map<uint32_t, uint32_t> IndexToAddressMap;
+  std::vector<uint32_t> StrOffsets;
   std::unordered_map<uint64_t, uint64_t> ProcessedBaseOffsets;
   bool StrOffsetSectionWasModified = false;
 };
diff --git a/bolt/lib/Core/DIEBuilder.cpp b/bolt/lib/Core/DIEBuilder.cpp
index b809b2935ee9eb..caa5ecbea521dc 100644
--- a/bolt/lib/Core/DIEBuilder.cpp
+++ b/bolt/lib/Core/DIEBuilder.cpp
@@ -189,7 +189,8 @@ static unsigned int getCUNum(DWARFContext *DwarfContext, bool IsDWO) {
   return CUNum;
 }
 
-void DIEBuilder::buildTypeUnits(const bool Init) {
+void DIEBuilder::buildTypeUnits(DebugStrOffsetsWriter *StrOffsetWriter,
+                                const bool Init) {
   if (Init)
     BuilderState.reset(new State());
 
@@ -229,8 +230,11 @@ void DIEBuilder::buildTypeUnits(const bool Init) {
     registerUnit(*DU.get(), false);
   }
 
-  for (DWARFUnit *DU : getState().DWARF5TUVector)
+  for (DWARFUnit *DU : getState().DWARF5TUVector) {
     constructFromUnit(*DU);
+    if (StrOffsetWriter)
+      StrOffsetWriter->finalizeSection(*DU, *this);
+  }
 }
 
 void DIEBuilder::buildCompileUnits(const bool Init) {
@@ -280,7 +284,7 @@ void DIEBuilder::buildCompileUnits(const std::vector<DWARFUnit *> &CUs) {
 void DIEBuilder::buildDWOUnit(DWARFUnit &U) {
   BuilderState.release();
   BuilderState = std::make_unique<State>();
-  buildTypeUnits(false);
+  buildTypeUnits(nullptr, false);
   getState().Type = ProcessingType::CUs;
   registerUnit(U, false);
   constructFromUnit(U);
diff --git a/bolt/lib/Core/DebugData.cpp b/bolt/lib/Core/DebugData.cpp
index dcf3a36e35e3fc..57547791552bab 100644
--- a/bolt/lib/Core/DebugData.cpp
+++ b/bolt/lib/Core/DebugData.cpp
@@ -851,35 +851,34 @@ std::string SimpleBinaryPatcher::patchBinary(StringRef BinaryContents) {
   return BinaryContentsStr;
 }
 
-void DebugStrOffsetsWriter::initialize(
-    const DWARFSection &StrOffsetsSection,
-    const std::optional<StrOffsetsContributionDescriptor> Contr) {
+void DebugStrOffsetsWriter::initialize(DWARFUnit &Unit) {
+  if (Unit.getVersion() < 5)
+    return;
+  const DWARFSection StrOffsetsSection = Unit.getStringOffsetSection();
+  const std::optional<StrOffsetsContributionDescriptor> Contr =
+      Unit.getStringOffsetsTableContribution();
   if (!Contr)
     return;
-
   const uint8_t DwarfOffsetByteSize = Contr->getDwarfOffsetByteSize();
   assert(DwarfOffsetByteSize == 4 &&
          "Dwarf String Offsets Byte Size is not supported.");
-  uint32_t Index = 0;
+  StrOffsets.reserve(Contr->Size);
   for (uint64_t Offset = 0; Offset < Contr->Size; Offset += DwarfOffsetByteSize)
-    IndexToAddressMap[Index++] = support::endian::read32le(
-        StrOffsetsSection.Data.data() + Contr->Base + Offset);
+    StrOffsets.push_back(support::endian::read32le(
+        StrOffsetsSection.Data.data() + Contr->Base + Offset));
 }
 
 void DebugStrOffsetsWriter::updateAddressMap(uint32_t Index, uint32_t Address) {
-  assert(IndexToAddressMap.count(Index) > 0 && "Index is not found.");
   IndexToAddressMap[Index] = Address;
   StrOffsetSectionWasModified = true;
 }
 
 void DebugStrOffsetsWriter::finalizeSection(DWARFUnit &Unit,
                                             DIEBuilder &DIEBldr) {
-  if (IndexToAddressMap.empty())
-    return;
-
   std::optional<AttrInfo> AttrVal =
       findAttributeInfo(Unit.getUnitDIE(), dwarf::DW_AT_str_offsets_base);
-  assert(AttrVal && "DW_AT_str_offsets_base not present.");
+  if (!AttrVal)
+    return;
   std::optional<uint64_t> Val = AttrVal->V.getAsSectionOffset();
   assert(Val && "DW_AT_str_offsets_base Value not present.");
   DIE &Die = *DIEBldr.getUnitDIEbyUnit(Unit);
@@ -888,11 +887,14 @@ void DebugStrOffsetsWriter::finalizeSection(DWARFUnit &Unit,
   auto RetVal = ProcessedBaseOffsets.find(*Val);
   // Handling re-use of str-offsets section.
   if (RetVal == ProcessedBaseOffsets.end() || StrOffsetSectionWasModified) {
+    initialize(Unit);
+    // Update String Offsets that were modified.
+    for (const auto &Entry : IndexToAddressMap)
+      StrOffsets[Entry.first] = Entry.second;
     // Writing out the header for each section.
-    support::endian::write(
-        *StrOffsetsStream,
-        static_cast<uint32_t>(IndexToAddressMap.size() * 4 + 4),
-        llvm::endianness::little);
+    support::endian::write(*StrOffsetsStream,
+                           static_cast<uint32_t>(StrOffsets.size() * 4 + 4),
+                           llvm::endianness::little);
     support::endian::write(*StrOffsetsStream, static_cast<uint16_t>(5),
                            llvm::endianness::little);
     support::endian::write(*StrOffsetsStream, static_cast<uint16_t>(0),
@@ -904,8 +906,8 @@ void DebugStrOffsetsWriter::finalizeSection(DWARFUnit &Unit,
       DIEBldr.replaceValue(&Die, dwarf::DW_AT_str_offsets_base,
                            StrListBaseAttrInfo.getForm(),
                            DIEInteger(BaseOffset));
-    for (const auto &Entry : IndexToAddressMap)
-      support::endian::write(*StrOffsetsStream, Entry.second,
+    for (const uint32_t Offset : StrOffsets)
+      support::endian::write(*StrOffsetsStream, Offset,
                              llvm::endianness::little);
   } else {
     DIEBldr.replaceValue(&Die, dwarf::DW_AT_str_offsets_base,
@@ -915,6 +917,7 @@ void DebugStrOffsetsWriter::finalizeSection(DWARFUnit &Unit,
 
   StrOffsetSectionWasModified = false;
   IndexToAddressMap.clear();
+  StrOffsets.clear();
 }
 
 void DebugStrWriter::create() {
diff --git a/bolt/lib/Rewrite/DWARFRewriter.cpp b/bolt/lib/Rewrite/DWARFRewriter.cpp
index 1cc07c1cc9f76a..05fb3e8fafe2f7 100644
--- a/bolt/lib/Rewrite/DWARFRewriter.cpp
+++ b/bolt/lib/Rewrite/DWARFRewriter.cpp
@@ -554,7 +554,7 @@ void DWARFRewriter::addStringHelper(DIEBuilder &DIEBldr, DIE &Die,
                                     const DWARFUnit &Unit,
                                     DIEValue &DIEAttrInfo, StringRef Str) {
   uint32_t NewOffset = StrWriter->addString(Str);
-  if (Unit.getVersion() == 5) {
+  if (Unit.getVersion() >= 5) {
     StrOffstsWriter->updateAddressMap(DIEAttrInfo.getDIEInteger().getValue(),
                                       NewOffset);
     return;
@@ -696,9 +696,6 @@ void DWARFRewriter::updateDebugInfo() {
     std::optional<DWARFUnit *> SplitCU;
     std::optional<uint64_t> RangesBase;
     std::optional<uint64_t> DWOId = Unit->getDWOId();
-    if (Unit->getVersion() >= 5)
-      StrOffstsWriter->initialize(Unit->getStringOffsetSection(),
-                                  Unit->getStringOffsetsTableContribution());
     if (DWOId)
       SplitCU = BC.getDWOCU(*DWOId);
     DebugLocWriter *DebugLocWriter = createRangeLocList(*Unit);
@@ -753,7 +750,7 @@ void DWARFRewriter::updateDebugInfo() {
   };
 
   DIEBuilder DIEBlder(BC.DwCtx.get());
-  DIEBlder.buildTypeUnits();
+  DIEBlder.buildTypeUnits(StrOffstsWriter.get());
   SmallVector<char, 20> OutBuffer;
   std::unique_ptr<raw_svector_ostream> ObjOS =
       std::make_unique<raw_svector_ostream>(OutBuffer);
diff --git a/bolt/test/X86/Inputs/dwarf5-basic-cu.s b/bolt/test/X86/Inputs/dwarf5-basic-cu.s
new file mode 100644
index 00000000000000..9a7dcadbdf49eb
--- /dev/null
+++ b/bolt/test/X86/Inputs/dwarf5-basic-cu.s
@@ -0,0 +1,156 @@
+	.text
+	.file	"main.cpp"
+	.globl	main                            # -- Begin function main
+	.p2align	4, 0x90
+	.type	main, at function
+main:                                   # @main
+.Lfunc_begin0:
+	.file	0 "/home" "main.cpp" md5 0xbb12fec8d002b1f0e06f7dee4604c6cc
+	.loc	0 1 0                           # main.cpp:1:0
+	.cfi_startproc
+# %bb.0:                                # %entry
+	pushq	%rbp
+	.cfi_def_cfa_offset 16
+	.cfi_offset %rbp, -16
+	movq	%rsp, %rbp
+	.cfi_def_cfa_register %rbp
+	movl	$0, -4(%rbp)
+.Ltmp0:
+	.loc	0 2 3 prologue_end              # main.cpp:2:3
+	xorl	%eax, %eax
+	.loc	0 2 3 epilogue_begin is_stmt 0  # main.cpp:2:3
+	popq	%rbp
+	.cfi_def_cfa %rsp, 8
+	retq
+.Ltmp1:
+.Lfunc_end0:
+	.size	main, .Lfunc_end0-main
+	.cfi_endproc
+                                        # -- End function
+	.section	.debug_abbrev,"", at progbits
+	.byte	1                               # Abbreviation Code
+	.byte	17                              # DW_TAG_compile_unit
+	.byte	1                               # DW_CHILDREN_yes
+	.byte	37                              # DW_AT_producer
+	.byte	37                              # DW_FORM_strx1
+	.byte	19                              # DW_AT_language
+	.byte	5                               # DW_FORM_data2
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	114                             # DW_AT_str_offsets_base
+	.byte	23                              # DW_FORM_sec_offset
+	.byte	16                              # DW_AT_stmt_list
+	.byte	23                              # DW_FORM_sec_offset
+	.byte	27                              # DW_AT_comp_dir
+	.byte	37                              # DW_FORM_strx1
+	.byte	17                              # DW_AT_low_pc
+	.byte	27                              # DW_FORM_addrx
+	.byte	18                              # DW_AT_high_pc
+	.byte	6                               # DW_FORM_data4
+	.byte	115                             # DW_AT_addr_base
+	.byte	23                              # DW_FORM_sec_offset
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	2                               # Abbreviation Code
+	.byte	46                              # DW_TAG_subprogram
+	.byte	0                               # DW_CHILDREN_no
+	.byte	17                              # DW_AT_low_pc
+	.byte	27                              # DW_FORM_addrx
+	.byte	18                              # DW_AT_high_pc
+	.byte	6                               # DW_FORM_data4
+	.byte	64                              # DW_AT_frame_base
+	.byte	24                              # DW_FORM_exprloc
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	58                              # DW_AT_decl_file
+	.byte	11                              # DW_FORM_data1
+	.byte	59                              # DW_AT_decl_line
+	.byte	11                              # DW_FORM_data1
+	.byte	73                              # DW_AT_type
+	.byte	19                              # DW_FORM_ref4
+	.byte	63                              # DW_AT_external
+	.byte	25                              # DW_FORM_flag_present
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	3                               # Abbreviation Code
+	.byte	36                              # DW_TAG_base_type
+	.byte	0                               # DW_CHILDREN_no
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	62                              # DW_AT_encoding
+	.byte	11                              # DW_FORM_data1
+	.byte	11                              # DW_AT_byte_size
+	.byte	11                              # DW_FORM_data1
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	0                               # EOM(3)
+	.section	.debug_info,"", at progbits
+.Lcu_begin0:
+	.long	.Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+	.short	5                               # DWARF version number
+	.byte	1                               # DWARF Unit Type
+	.byte	8                               # Address Size (in bytes)
+	.long	.debug_abbrev                   # Offset Into Abbrev. Section
+	.byte	1                               # Abbrev [1] 0xc:0x2b DW_TAG_compile_unit
+	.byte	0                               # DW_AT_producer
+	.short	33                              # DW_AT_language
+	.byte	1                               # DW_AT_name
+	.long	.Lstr_offsets_base0             # DW_AT_str_offsets_base
+	.long	.Lline_table_start0             # DW_AT_stmt_list
+	.byte	2                               # DW_AT_comp_dir
+	.byte	0                               # DW_AT_low_pc
+	.long	.Lfunc_end0-.Lfunc_begin0       # DW_AT_high_pc
+	.long	.Laddr_table_base0              # DW_AT_addr_base
+	.byte	2                               # Abbrev [2] 0x23:0xf DW_TAG_subprogram
+	.byte	0                               # DW_AT_low_pc
+	.long	.Lfunc_end0-.Lfunc_begin0       # DW_AT_high_pc
+	.byte	1                               # DW_AT_frame_base
+	.byte	86
+	.byte	3                               # DW_AT_name
+	.byte	0                               # DW_AT_decl_file
+	.byte	1                               # DW_AT_decl_line
+	.long	50                              # DW_AT_type
+                                        # DW_AT_external
+	.byte	3                               # Abbrev [3] 0x32:0x4 DW_TAG_base_type
+	.byte	4                               # DW_AT_name
+	.byte	5                               # DW_AT_encoding
+	.byte	4                               # DW_AT_byte_size
+	.byte	0                               # End Of Children Mark
+.Ldebug_info_end0:
+	.section	.debug_str_offsets,"", at progbits
+	.long	24                              # Length of String Offsets Set
+	.short	5
+	.short	0
+.Lstr_offsets_base0:
+	.section	.debug_str,"MS", at progbits,1
+.Linfo_string0:
+	.asciz	"clang version 18.0.0git (git at github.com:llvm/llvm-project.git 44dc1e0baae7c4b8a02ba06dcf396d3d452aa873)" # string offset=0
+.Linfo_string1:
+	.asciz	"main.cpp"                      # string offset=104
+.Linfo_string2:
+	.asciz	"/home" # string offset=113
+.Linfo_string3:
+	.asciz	"main"                          # string offset=151
+.Linfo_string4:
+	.asciz	"int"                           # string offset=156
+	.section	.debug_str_offsets,"", at progbits
+	.long	.Linfo_string0
+	.long	.Linfo_string1
+	.long	.Linfo_string2
+	.long	.Linfo_string3
+	.long	.Linfo_string4
+	.section	.debug_addr,"", at progbits
+	.long	.Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution
+.Ldebug_addr_start0:
+	.short	5                               # DWARF version number
+	.byte	8                               # Address size
+	.byte	0                               # Segment selector size
+.Laddr_table_base0:
+	.quad	.Lfunc_begin0
+.Ldebug_addr_end0:
+	.ident	"clang version 18.0.0git (git at github.com:llvm/llvm-project.git 44dc1e0baae7c4b8a02ba06dcf396d3d452aa873)"
+	.section	".note.GNU-stack","", at progbits
+	.addrsig
+	.section	.debug_line,"", at progbits
+.Lline_table_start0:
diff --git a/bolt/test/X86/Inputs/dwarf5-types-no-cu.s b/bolt/test/X86/Inputs/dwarf5-types-no-cu.s
new file mode 100644
index 00000000000000..56fcd1542a0a53
--- /dev/null
+++ b/bolt/test/X86/Inputs/dwarf5-types-no-cu.s
@@ -0,0 +1,281 @@
+# clang++ helper.cpp -g2 -gdwarf-5 -gno-pubnames -fdebug-types-section -S -o helperTypes.s
+# struct Foo1 {
+#   char a1;
+#   char a2;
+#   char a3;
+# };
+#
+# struct Foo2 {
+#   int b1;
+#   int b2;
+# };
+#
+# Foo1 f1;
+# Foo2 f2;
+
+# Manually removed Compile Unit .debug_info section.
+
+	.text
+	.file	"helper.cpp"
+	.file	0 "/home" "helper.cpp" md5 0xd58ef77d520bf2e6491a2e387a3501f1
+	.section	.debug_info,"G", at progbits,5391472263833448044,comdat
+.Ltu_begin0:
+	.long	.Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+	.short	5                               # DWARF version number
+	.byte	2                               # DWARF Unit Type
+	.byte	8                               # Address Size (in bytes)
+	.long	.debug_abbrev                   # Offset Into Abbrev. Section
+	.quad	5391472263833448044             # Type Signature
+	.long	35                              # Type DIE Offset
+	.byte	1                               # Abbrev [1] 0x18:0x32 DW_TAG_type_unit
+	.short	33                              # DW_AT_language
+	.long	.Lline_table_start0             # DW_AT_stmt_list
+	.long	.Lstr_offsets_base0             # DW_AT_str_offsets_base
+	.byte	2                               # Abbrev [2] 0x23:0x22 DW_TAG_structure_type
+	.byte	5                               # DW_AT_calling_convention
+	.byte	8                               # DW_AT_name
+	.byte	3                               # DW_AT_byte_size
+	.byte	0                               # DW_AT_decl_file
+	.byte	1                               # DW_AT_decl_line
+	.byte	3                               # Abbrev [3] 0x29:0x9 DW_TAG_member
+	.byte	4                               # DW_AT_name
+	.long	69                              # DW_AT_type
+	.byte	0                               # DW_AT_decl_file
+	.byte	2                               # DW_AT_decl_line
+	.byte	0                               # DW_AT_data_member_location
+	.byte	3                               # Abbrev [3] 0x32:0x9 DW_TAG_member
+	.byte	6                               # DW_AT_name
+	.long	69                              # DW_AT_type
+	.byte	0                               # DW_AT_decl_file
+	.byte	3                               # DW_AT_decl_line
+	.byte	1                               # DW_AT_data_member_location
+	.byte	3                               # Abbrev [3] 0x3b:0x9 DW_TAG_member
+	.byte	7                               # DW_AT_name
+	.long	69                              # DW_AT_type
+	.byte	0                               # DW_AT_decl_file
+	.byte	4                               # DW_AT_decl_line
+	.byte	2                               # DW_AT_data_member_location
+	.byte	0                               # End Of Children Mark
+	.byte	4                               # Abbrev [4] 0x45:0x4 DW_TAG_base_type
+	.byte	5                               # DW_AT_name
+	.byte	6                               # DW_AT_encoding
+	.byte	1                               # DW_AT_byte_size
+	.byte	0                               # End Of Children Mark
+.Ldebug_info_end0:
+	.section	.debug_info,"G", at progbits,5322170643381124694,comdat
+.Ltu_begin1:
+	.long	.Ldebug_info_end1-.Ldebug_info_start1 # Length of Unit
+.Ldebug_info_start1:
+	.short	5                               # DWARF version number
+	.byte	2                               # DWARF Unit Type
+	.byte	8                               # Address Size (in bytes)
+	.long	.debug_abbrev                   # Offset Into Abbrev. Section
+	.quad	5322170643381124694             # Type Signature
+	.long	35                              # Type DIE Offset
+	.byte	1                               # Abbrev [1] 0x18:0x29 DW_TAG_type_unit
+	.short	33                              # DW_AT_language
+	.long	.Lline_table_start0             # DW_AT_stmt_list
+	.long	.Lstr_offsets_base0             # DW_AT_str_offsets_base
+	.byte	2                               # Abbrev [2] 0x23:0x19 DW_TAG_structure_type
+	.byte	5                               # DW_AT_calling_convention
+	.byte	13                              # DW_AT_name
+	.byte	8                               # DW_AT_byte_size
+	.byte	0                               # DW_AT_decl_file
+	.byte	7                               # DW_AT_decl_line
+	.byte	3                               # Abbrev [3] 0x29:0x9 DW_TAG_member
+	.byte	10                              # DW_AT_name
+	.long	60                              # DW_AT_type
+	.byte	0                               # DW_AT_decl_file
+	.byte	8                               # DW_AT_decl_line
+	.byte	0                               # DW_AT_data_member_location
+	.byte	3                               # Abbrev [3] 0x32:0x9 DW_TAG_member
+	.byte	12                              # DW_AT_name
+	.long	60                              # DW_AT_type
+	.byte	0                               # DW_AT_decl_file
+	.byte	9                               # DW_AT_decl_line
+	.byte	4                               # DW_AT_data_member_location
+	.byte	0                               # End Of Children Mark
+	.byte	4                               # Abbrev [4] 0x3c:0x4 DW_TAG_base_type
+	.byte	11                              # DW_AT_name
+	.byte	5                               # DW_AT_encoding
+	.byte	4                               # DW_AT_byte_size
+	.byte	0                               # End Of Children Mark
+.Ldebug_info_end1:
+	.type	f1, at object                      # @f1
+	.bss
+	.globl	f1
+f1:
+	.zero	3
+	.size	f1, 3
+
+	.type	f2, at object                      # @f2
+	.globl	f2
+	.p2align	2, 0x0
+f2:
+	.zero	8
+	.size	f2, 8
+
+	.section	.debug_abbrev,"", at progbits
+	.byte	1                               # Abbreviation Code
+	.byte	65                              # DW_TAG_type_unit
+	.byte	1                               # DW_CHILDREN_yes
+	.byte	19                              # DW_AT_language
+	.byte	5                               # DW_FORM_data2
+	.byte	16                              # DW_AT_stmt_list
+	.byte	23                              # DW_FORM_sec_offset
+	.byte	114                             # DW_AT_str_offsets_base
+	.byte	23                              # DW_FORM_sec_offset
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	2                               # Abbreviation Code
+	.byte	19                              # DW_TAG_structure_type
+	.byte	1                               # DW_CHILDREN_yes
+	.byte	54                              # DW_AT_calling_convention
+	.byte	11                              # DW_FORM_data1
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	11                              # DW_AT_byte_size
+	.byte	11                              # DW_FORM_data1
+	.byte	58                              # DW_AT_decl_file
+	.byte	11                              # DW_FORM_data1
+	.byte	59                              # DW_AT_decl_line
+	.byte	11                              # DW_FORM_data1
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	3                               # Abbreviation Code
+	.byte	13                              # DW_TAG_member
+	.byte	0                               # DW_CHILDREN_no
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	73                              # DW_AT_type
+	.byte	19                              # DW_FORM_ref4
+	.byte	58                              # DW_AT_decl_file
+	.byte	11                              # DW_FORM_data1
+	.byte	59                              # DW_AT_decl_line
+	.byte	11                              # DW_FORM_data1
+	.byte	56                              # DW_AT_data_member_location
+	.byte	11                              # DW_FORM_data1
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	4                               # Abbreviation Code
+	.byte	36                              # DW_TAG_base_type
+	.byte	0                               # DW_CHILDREN_no
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	62                              # DW_AT_encoding
+	.byte	11                              # DW_FORM_data1
+	.byte	11                              # DW_AT_byte_size
+	.byte	11                              # DW_FORM_data1
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	5                               # Abbreviation Code
+	.byte	17                              # DW_TAG_compile_unit
+	.byte	1                               # DW_CHILDREN_yes
+	.byte	37                              # DW_AT_producer
+	.byte	37                              # DW_FORM_strx1
+	.byte	19                              # DW_AT_language
+	.byte	5                               # DW_FORM_data2
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	114                             # DW_AT_str_offsets_base
+	.byte	23                              # DW_FORM_sec_offset
+	.byte	16                              # DW_AT_stmt_list
+	.byte	23                              # DW_FORM_sec_offset
+	.byte	27                              # DW_AT_comp_dir
+	.byte	37                              # DW_FORM_strx1
+	.byte	115                             # DW_AT_addr_base
+	.byte	23                              # DW_FORM_sec_offset
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	6                               # Abbreviation Code
+	.byte	52                              # DW_TAG_variable
+	.byte	0                               # DW_CHILDREN_no
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	73                              # DW_AT_type
+	.byte	19                              # DW_FORM_ref4
+	.byte	63                              # DW_AT_external
+	.byte	25                              # DW_FORM_flag_present
+	.byte	58                              # DW_AT_decl_file
+	.byte	11                              # DW_FORM_data1
+	.byte	59                              # DW_AT_decl_line
+	.byte	11                              # DW_FORM_data1
+	.byte	2                               # DW_AT_location
+	.byte	24                              # DW_FORM_exprloc
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	7                               # Abbreviation Code
+	.byte	19                              # DW_TAG_structure_type
+	.byte	0                               # DW_CHILDREN_no
+	.byte	60                              # DW_AT_declaration
+	.byte	25                              # DW_FORM_flag_present
+	.byte	105                             # DW_AT_signature
+	.byte	32                              # DW_FORM_ref_sig8
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	0                               # EOM(3)
+	.section	.debug_str_offsets,"", at progbits
+	.long	60                              # Length of String Offsets Set
+	.short	5
+	.short	0
+.Lstr_offsets_base0:
+	.section	.debug_str,"MS", at progbits,1
+.Linfo_string0:
+	.asciz	"clang version 18.0.0git (git at github.com:llvm/llvm-project.git 44dc1e0baae7c4b8a02ba06dcf396d3d452aa873)" # string offset=0
+.Linfo_string1:
+	.asciz	"helper.cpp"                    # string offset=104
+.Linfo_string2:
+	.asciz	"/home" # string offset=115
+.Linfo_string3:
+	.asciz	"f1"                            # string offset=153
+.Linfo_string4:
+	.asciz	"a1"                            # string offset=156
+.Linfo_string5:
+	.asciz	"char"                          # string offset=159
+.Linfo_string6:
+	.asciz	"a2"                            # string offset=164
+.Linfo_string7:
+	.asciz	"a3"                            # string offset=167
+.Linfo_string8:
+	.asciz	"Foo1"                          # string offset=170
+.Linfo_string9:
+	.asciz	"f2"                            # string offset=175
+.Linfo_string10:
+	.asciz	"b1"                            # string offset=178
+.Linfo_string11:
+	.asciz	"int"                           # string offset=181
+.Linfo_string12:
+	.asciz	"b2"                            # string offset=185
+.Linfo_string13:
+	.asciz	"Foo2"                          # string offset=188
+	.section	.debug_str_offsets,"", at progbits
+	.long	.Linfo_string0
+	.long	.Linfo_string1
+	.long	.Linfo_string2
+	.long	.Linfo_string3
+	.long	.Linfo_string4
+	.long	.Linfo_string5
+	.long	.Linfo_string6
+	.long	.Linfo_string7
+	.long	.Linfo_string8
+	.long	.Linfo_string9
+	.long	.Linfo_string10
+	.long	.Linfo_string11
+	.long	.Linfo_string12
+	.long	.Linfo_string13
+	.section	.debug_addr,"", at progbits
+	.long	.Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution
+.Ldebug_addr_start0:
+	.short	5                               # DWARF version number
+	.byte	8                               # Address size
+	.byte	0                               # Segment selector size
+.Laddr_table_base0:
+	.quad	f1
+	.quad	f2
+.Ldebug_addr_end0:
+	.ident	"clang version 18.0.0git (git at github.com:llvm/llvm-project.git 44dc1e0baae7c4b8a02ba06dcf396d3d452aa873)"
+	.section	".note.GNU-stack","", at progbits
+	.addrsig
+	.section	.debug_line,"", at progbits
+.Lline_table_start0:
diff --git a/bolt/test/X86/dwarf5-type-unit-no-cu-str-offset-table.test b/bolt/test/X86/dwarf5-type-unit-no-cu-str-offset-table.test
new file mode 100644
index 00000000000000..385c7033720cdb
--- /dev/null
+++ b/bolt/test/X86/dwarf5-type-unit-no-cu-str-offset-table.test
@@ -0,0 +1,58 @@
+# REQUIRES: system-linux
+
+# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5-basic-cu.s -o %tmain.o
+# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5-types-no-cu.s -o %thelper.o
+# RUN: %clang %cflags -dwarf-5 %tmain.o %thelper.o -o %t.exe -Wl,-q
+# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections
+# RUN: llvm-dwarfdump --show-form --verbose --debug-str-offsets %t.exe > %t.txt
+# RUN: llvm-dwarfdump --show-form --verbose --debug-str-offsets %t.bolt >> %t.txt
+# RUN: cat %t.txt | FileCheck --check-prefix=CHECK %s
+
+## This test checks we correclty re-renerate .debug_str_offsets when there are type units that have an offset not shared with CU.
+
+# CHECK: .debug_str_offsets contents
+# CHECK-NEXT: Contribution size = 24, Format = DWARF32, Version = 5
+# CHECK-NEXT:  "clang version 18.0.0git (git at github.com:llvm/llvm-project.git 44dc1e0baae7c4b8a02ba06dcf396d3d452aa873)"
+# CHECK-NEXT:  "main.cpp"
+# CHECK-NEXT:  "/home"
+# CHECK-NEXT:  "main"
+# CHECK-NEXT:  "int"
+# CHECK-NEXT: Contribution size = 60, Format = DWARF32, Version = 5
+# CHECK-NEXT:  "clang version 18.0.0git (git at github.com:llvm/llvm-project.git 44dc1e0baae7c4b8a02ba06dcf396d3d452aa873)"
+# CHECK-NEXT:  "helper.cpp"
+# CHECK-NEXT:  "/home"
+# CHECK-NEXT:  "f1"
+# CHECK-NEXT:  "a1"
+# CHECK-NEXT:  "char"
+# CHECK-NEXT:  "a2"
+# CHECK-NEXT:  "a3"
+# CHECK-NEXT:  "Foo1"
+# CHECK-NEXT:  "f2"
+# CHECK-NEXT:  "b1"
+# CHECK-NEXT:  "int"
+# CHECK-NEXT:  "b2"
+# CHECK-NEXT:  "Foo2"
+
+## Checking post bolt
+# CHECK: .debug_str_offsets contents
+# CHECK-NEXT: Contribution size = 60, Format = DWARF32, Version = 5
+# CHECK-NEXT:  "clang version 18.0.0git (git at github.com:llvm/llvm-project.git 44dc1e0baae7c4b8a02ba06dcf396d3d452aa873)"
+# CHECK-NEXT:  "helper.cpp"
+# CHECK-NEXT:  "/home"
+# CHECK-NEXT:  "f1"
+# CHECK-NEXT:  "a1"
+# CHECK-NEXT:  "char"
+# CHECK-NEXT:  "a2"
+# CHECK-NEXT:  "a3"
+# CHECK-NEXT:  "Foo1"
+# CHECK-NEXT:  "f2"
+# CHECK-NEXT:  "b1"
+# CHECK-NEXT:  "int"
+# CHECK-NEXT:  "b2"
+# CHECK-NEXT:  "Foo2"
+# CHECK-NEXT: Contribution size = 24, Format = DWARF32, Version = 5
+# CHECK-NEXT:  "clang version 18.0.0git (git at github.com:llvm/llvm-project.git 44dc1e0baae7c4b8a02ba06dcf396d3d452aa873)"
+# CHECK-NEXT:  "main.cpp"
+# CHECK-NEXT:  "/home"
+# CHECK-NEXT:  "main"
+# CHECK-NEXT:  "int"

>From af8cb43760714bc9cb1d390346fcc1ddbf1e7ff2 Mon Sep 17 00:00:00 2001
From: Alexander Yermolovich <ayermolo at meta.com>
Date: Thu, 14 Dec 2023 12:58:02 -0800
Subject: [PATCH 2/3] removed dwarf5 flag

---
 bolt/test/X86/dwarf5-type-unit-no-cu-str-offset-table.test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bolt/test/X86/dwarf5-type-unit-no-cu-str-offset-table.test b/bolt/test/X86/dwarf5-type-unit-no-cu-str-offset-table.test
index 385c7033720cdb..12a1467aaaf2a6 100644
--- a/bolt/test/X86/dwarf5-type-unit-no-cu-str-offset-table.test
+++ b/bolt/test/X86/dwarf5-type-unit-no-cu-str-offset-table.test
@@ -2,7 +2,7 @@
 
 # RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5-basic-cu.s -o %tmain.o
 # RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5-types-no-cu.s -o %thelper.o
-# RUN: %clang %cflags -dwarf-5 %tmain.o %thelper.o -o %t.exe -Wl,-q
+# RUN: %clang %cflags %tmain.o %thelper.o -o %t.exe -Wl,-q
 # RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections
 # RUN: llvm-dwarfdump --show-form --verbose --debug-str-offsets %t.exe > %t.txt
 # RUN: llvm-dwarfdump --show-form --verbose --debug-str-offsets %t.bolt >> %t.txt

>From 3f8da9acff0d57f1d58b7fbcda95987768bd998f Mon Sep 17 00:00:00 2001
From: Alexander Yermolovich <ayermolo at meta.com>
Date: Thu, 14 Dec 2023 16:53:58 -0800
Subject: [PATCH 3/3] Addressed comments

---
 bolt/lib/Core/DebugData.cpp                   |  4 +-
 bolt/test/X86/Inputs/dwarf5-basic-cu.s        |  4 +
 ...arf5-type-unit-no-cu-str-offset-table.test | 95 +++++++++----------
 3 files changed, 53 insertions(+), 50 deletions(-)

diff --git a/bolt/lib/Core/DebugData.cpp b/bolt/lib/Core/DebugData.cpp
index 57547791552bab..415b0310b6bac8 100644
--- a/bolt/lib/Core/DebugData.cpp
+++ b/bolt/lib/Core/DebugData.cpp
@@ -854,8 +854,8 @@ std::string SimpleBinaryPatcher::patchBinary(StringRef BinaryContents) {
 void DebugStrOffsetsWriter::initialize(DWARFUnit &Unit) {
   if (Unit.getVersion() < 5)
     return;
-  const DWARFSection StrOffsetsSection = Unit.getStringOffsetSection();
-  const std::optional<StrOffsetsContributionDescriptor> Contr =
+  const DWARFSection &StrOffsetsSection = Unit.getStringOffsetSection();
+  const std::optional<StrOffsetsContributionDescriptor> &Contr =
       Unit.getStringOffsetsTableContribution();
   if (!Contr)
     return;
diff --git a/bolt/test/X86/Inputs/dwarf5-basic-cu.s b/bolt/test/X86/Inputs/dwarf5-basic-cu.s
index 9a7dcadbdf49eb..a57803ea05ebc5 100644
--- a/bolt/test/X86/Inputs/dwarf5-basic-cu.s
+++ b/bolt/test/X86/Inputs/dwarf5-basic-cu.s
@@ -1,3 +1,7 @@
+# clang++ main.cpp   -g2 -gdwarf-5 -gno-pubnames -fdebug-types-section -S
+# int main() {
+#   return 0;
+# }
 	.text
 	.file	"main.cpp"
 	.globl	main                            # -- Begin function main
diff --git a/bolt/test/X86/dwarf5-type-unit-no-cu-str-offset-table.test b/bolt/test/X86/dwarf5-type-unit-no-cu-str-offset-table.test
index 12a1467aaaf2a6..21ced6ce687b5c 100644
--- a/bolt/test/X86/dwarf5-type-unit-no-cu-str-offset-table.test
+++ b/bolt/test/X86/dwarf5-type-unit-no-cu-str-offset-table.test
@@ -4,55 +4,54 @@
 # RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5-types-no-cu.s -o %thelper.o
 # RUN: %clang %cflags %tmain.o %thelper.o -o %t.exe -Wl,-q
 # RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections
-# RUN: llvm-dwarfdump --show-form --verbose --debug-str-offsets %t.exe > %t.txt
-# RUN: llvm-dwarfdump --show-form --verbose --debug-str-offsets %t.bolt >> %t.txt
-# RUN: cat %t.txt | FileCheck --check-prefix=CHECK %s
+# RUN: llvm-dwarfdump --show-form --verbose --debug-str-offsets %t.exe | FileCheck -check-prefix=PRE-BOLT %s
+# RUN: llvm-dwarfdump --show-form --verbose --debug-str-offsets %t.bolt | FileCheck -check-prefix=POST-BOLT %s
 
-## This test checks we correclty re-renerate .debug_str_offsets when there are type units that have an offset not shared with CU.
+## This test checks we correclty re-generate .debug_str_offsets when there are type units that have an offset not shared with CU.
 
-# CHECK: .debug_str_offsets contents
-# CHECK-NEXT: Contribution size = 24, Format = DWARF32, Version = 5
-# CHECK-NEXT:  "clang version 18.0.0git (git at github.com:llvm/llvm-project.git 44dc1e0baae7c4b8a02ba06dcf396d3d452aa873)"
-# CHECK-NEXT:  "main.cpp"
-# CHECK-NEXT:  "/home"
-# CHECK-NEXT:  "main"
-# CHECK-NEXT:  "int"
-# CHECK-NEXT: Contribution size = 60, Format = DWARF32, Version = 5
-# CHECK-NEXT:  "clang version 18.0.0git (git at github.com:llvm/llvm-project.git 44dc1e0baae7c4b8a02ba06dcf396d3d452aa873)"
-# CHECK-NEXT:  "helper.cpp"
-# CHECK-NEXT:  "/home"
-# CHECK-NEXT:  "f1"
-# CHECK-NEXT:  "a1"
-# CHECK-NEXT:  "char"
-# CHECK-NEXT:  "a2"
-# CHECK-NEXT:  "a3"
-# CHECK-NEXT:  "Foo1"
-# CHECK-NEXT:  "f2"
-# CHECK-NEXT:  "b1"
-# CHECK-NEXT:  "int"
-# CHECK-NEXT:  "b2"
-# CHECK-NEXT:  "Foo2"
+# PRE-BOLT: .debug_str_offsets contents
+# PRE-BOLT-NEXT: Contribution size = 24, Format = DWARF32, Version = 5
+# PRE-BOLT-NEXT:  "clang version 18.0.0git (git at github.com:llvm/llvm-project.git 44dc1e0baae7c4b8a02ba06dcf396d3d452aa873)"
+# PRE-BOLT-NEXT:  "main.cpp"
+# PRE-BOLT-NEXT:  "/home"
+# PRE-BOLT-NEXT:  "main"
+# PRE-BOLT-NEXT:  "int"
+# PRE-BOLT-NEXT: Contribution size = 60, Format = DWARF32, Version = 5
+# PRE-BOLT-NEXT:  "clang version 18.0.0git (git at github.com:llvm/llvm-project.git 44dc1e0baae7c4b8a02ba06dcf396d3d452aa873)"
+# PRE-BOLT-NEXT:  "helper.cpp"
+# PRE-BOLT-NEXT:  "/home"
+# PRE-BOLT-NEXT:  "f1"
+# PRE-BOLT-NEXT:  "a1"
+# PRE-BOLT-NEXT:  "char"
+# PRE-BOLT-NEXT:  "a2"
+# PRE-BOLT-NEXT:  "a3"
+# PRE-BOLT-NEXT:  "Foo1"
+# PRE-BOLT-NEXT:  "f2"
+# PRE-BOLT-NEXT:  "b1"
+# PRE-BOLT-NEXT:  "int"
+# PRE-BOLT-NEXT:  "b2"
+# PRE-BOLT-NEXT:  "Foo2"
 
 ## Checking post bolt
-# CHECK: .debug_str_offsets contents
-# CHECK-NEXT: Contribution size = 60, Format = DWARF32, Version = 5
-# CHECK-NEXT:  "clang version 18.0.0git (git at github.com:llvm/llvm-project.git 44dc1e0baae7c4b8a02ba06dcf396d3d452aa873)"
-# CHECK-NEXT:  "helper.cpp"
-# CHECK-NEXT:  "/home"
-# CHECK-NEXT:  "f1"
-# CHECK-NEXT:  "a1"
-# CHECK-NEXT:  "char"
-# CHECK-NEXT:  "a2"
-# CHECK-NEXT:  "a3"
-# CHECK-NEXT:  "Foo1"
-# CHECK-NEXT:  "f2"
-# CHECK-NEXT:  "b1"
-# CHECK-NEXT:  "int"
-# CHECK-NEXT:  "b2"
-# CHECK-NEXT:  "Foo2"
-# CHECK-NEXT: Contribution size = 24, Format = DWARF32, Version = 5
-# CHECK-NEXT:  "clang version 18.0.0git (git at github.com:llvm/llvm-project.git 44dc1e0baae7c4b8a02ba06dcf396d3d452aa873)"
-# CHECK-NEXT:  "main.cpp"
-# CHECK-NEXT:  "/home"
-# CHECK-NEXT:  "main"
-# CHECK-NEXT:  "int"
+# POST-BOLT: .debug_str_offsets contents
+# POST-BOLT-NEXT: Contribution size = 60, Format = DWARF32, Version = 5
+# POST-BOLT-NEXT:  "clang version 18.0.0git (git at github.com:llvm/llvm-project.git 44dc1e0baae7c4b8a02ba06dcf396d3d452aa873)"
+# POST-BOLT-NEXT:  "helper.cpp"
+# POST-BOLT-NEXT:  "/home"
+# POST-BOLT-NEXT:  "f1"
+# POST-BOLT-NEXT:  "a1"
+# POST-BOLT-NEXT:  "char"
+# POST-BOLT-NEXT:  "a2"
+# POST-BOLT-NEXT:  "a3"
+# POST-BOLT-NEXT:  "Foo1"
+# POST-BOLT-NEXT:  "f2"
+# POST-BOLT-NEXT:  "b1"
+# POST-BOLT-NEXT:  "int"
+# POST-BOLT-NEXT:  "b2"
+# POST-BOLT-NEXT:  "Foo2"
+# POST-BOLT-NEXT: Contribution size = 24, Format = DWARF32, Version = 5
+# POST-BOLT-NEXT:  "clang version 18.0.0git (git at github.com:llvm/llvm-project.git 44dc1e0baae7c4b8a02ba06dcf396d3d452aa873)"
+# POST-BOLT-NEXT:  "main.cpp"
+# POST-BOLT-NEXT:  "/home"
+# POST-BOLT-NEXT:  "main"
+# POST-BOLT-NEXT:  "int"



More information about the llvm-commits mailing list