[llvm] [BOLT] Fix debug line emission for functions in multiple compilation units (PR #151230)

Grigory Pastukhov via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 29 14:06:28 PDT 2025


https://github.com/grigorypas created https://github.com/llvm/llvm-project/pull/151230

This patch fixes a bug in BOLT's debug line emission where functions that belong to multiple compilation units (such as inline functions in header files) were not handled correctly. Previously, BOLT incorrectly assumed that a binary function could belong to only one compilation unit, leading to incomplete or incorrect debug line information.

### **Problem**

When a function appears in multiple compilation units (common scenarios include):

*   Template instantiated functions
*   Inline functions defined in header files included by multiple source files

BOLT would only emit debug line information for one compilation unit, losing debug information for other CUs where the function was compiled. This resulted in incomplete debugging information and could cause debuggers to fail to set breakpoints or show incorrect source locations.

### **Root Cause**

The issue was in BOLT's assumption that each binary function maps to exactly one compilation unit. However, when the same function (e.g., an inline function from a header) is compiled into multiple object files, it legitimately belongs to multiple CUs in the final binary.

>From 74747d14bade85c631b685c77004ac2a32fd0899 Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Thu, 24 Jul 2025 13:50:13 -0700
Subject: [PATCH 1/3] Change DwarfUnit field to vector in BinaryFunction

---
 bolt/include/bolt/Core/BinaryFunction.h | 27 ++++++---
 bolt/lib/Core/BinaryContext.cpp         | 38 +++++++-----
 bolt/lib/Core/BinaryEmitter.cpp         | 80 ++++++++++++++-----------
 bolt/lib/Core/BinaryFunction.cpp        | 13 +++-
 4 files changed, 95 insertions(+), 63 deletions(-)

diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index ae580520b9110..966559e0c6fa6 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -423,8 +423,8 @@ class BinaryFunction {
   /// Original LSDA type encoding
   unsigned LSDATypeEncoding{dwarf::DW_EH_PE_omit};
 
-  /// Containing compilation unit for the function.
-  DWARFUnit *DwarfUnit{nullptr};
+  /// All compilation units this function belongs to.
+  SmallVector<DWARFUnit *, 1> DwarfUnitVec;
 
   /// Last computed hash value. Note that the value could be recomputed using
   /// different parameters by every pass.
@@ -2414,15 +2414,24 @@ class BinaryFunction {
   void
   computeBlockHashes(HashFunction HashFunction = HashFunction::Default) const;
 
-  void setDWARFUnit(DWARFUnit *Unit) { DwarfUnit = Unit; }
+  void addDWARFUnit(DWARFUnit *Unit) { DwarfUnitVec.push_back(Unit); }
 
-  /// Return DWARF compile unit for this function.
-  DWARFUnit *getDWARFUnit() const { return DwarfUnit; }
+  void removeDWARFUnit(DWARFUnit *Unit) {
+    auto *It = std::find(DwarfUnitVec.begin(), DwarfUnitVec.end(), Unit);
+    // If found, erase it
+    if (It != DwarfUnitVec.end()) {
+      DwarfUnitVec.erase(It);
+    }
+  }
+
+  /// Return DWARF compile units for this function.
+  const SmallVector<DWARFUnit *, 1> getDWARFUnits() const {
+    return DwarfUnitVec;
+  }
 
-  /// Return line info table for this function.
-  const DWARFDebugLine::LineTable *getDWARFLineTable() const {
-    return getDWARFUnit() ? BC.DwCtx->getLineTableForUnit(getDWARFUnit())
-                          : nullptr;
+  const DWARFDebugLine::LineTable *
+  getDWARFLineTableForUnit(DWARFUnit *Unit) const {
+    return BC.DwCtx->getLineTableForUnit(Unit);
   }
 
   /// Finalize profile for the function.
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index 84f1853469709..c58d99a77f8b3 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -1697,22 +1697,35 @@ void BinaryContext::preprocessDebugInfo() {
 
     auto It = llvm::partition_point(
         AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; });
-    if (It != AllRanges.end() && It->LowPC <= FunctionAddress)
-      Function.setDWARFUnit(It->Unit);
+    if (It == AllRanges.end() || It->LowPC > FunctionAddress) {
+      continue;
+    }
+    Function.addDWARFUnit(It->Unit);
+
+    // Go forward and add all units from ranges that cover the function
+    while (++It != AllRanges.end()) {
+      if (It->LowPC <= FunctionAddress && FunctionAddress < It->HighPC) {
+        Function.addDWARFUnit(It->Unit);
+      } else {
+        break;
+      }
+    }
   }
 
   // Discover units with debug info that needs to be updated.
   for (const auto &KV : BinaryFunctions) {
     const BinaryFunction &BF = KV.second;
-    if (shouldEmit(BF) && BF.getDWARFUnit())
-      ProcessedCUs.insert(BF.getDWARFUnit());
+    if (shouldEmit(BF) && !BF.getDWARFUnits().empty())
+      for (const DWARFUnit *Unit : BF.getDWARFUnits())
+        ProcessedCUs.insert(Unit);
   }
-
   // Clear debug info for functions from units that we are not going to process.
   for (auto &KV : BinaryFunctions) {
     BinaryFunction &BF = KV.second;
-    if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit()))
-      BF.setDWARFUnit(nullptr);
+    for (auto *Unit : BF.getDWARFUnits()) {
+      if (!ProcessedCUs.count(Unit))
+        BF.removeDWARFUnit(Unit);
+    }
   }
 
   if (opts::Verbosity >= 1) {
@@ -1912,14 +1925,9 @@ static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction,
   if (RowRef == DebugLineTableRowRef::NULL_ROW)
     return;
 
-  const DWARFDebugLine::LineTable *LineTable;
-  if (Function && Function->getDWARFUnit() &&
-      Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) {
-    LineTable = Function->getDWARFLineTable();
-  } else {
-    LineTable = DwCtx->getLineTableForUnit(
-        DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex));
-  }
+  const DWARFDebugLine::LineTable *LineTable = DwCtx->getLineTableForUnit(
+      DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex));
+
   assert(LineTable && "line table expected for instruction with debug info");
 
   const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1];
diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp
index 7b5cd276fee89..34bda7403d259 100644
--- a/bolt/lib/Core/BinaryEmitter.cpp
+++ b/bolt/lib/Core/BinaryEmitter.cpp
@@ -177,7 +177,8 @@ class BinaryEmitter {
   /// Note that it does not automatically result in the insertion of the EOS
   /// marker in the line table program, but provides one to the DWARF generator
   /// when it needs it.
-  void emitLineInfoEnd(const BinaryFunction &BF, MCSymbol *FunctionEndSymbol);
+  void emitLineInfoEnd(const BinaryFunction &BF, MCSymbol *FunctionEndSymbol,
+                       DWARFUnit *Unit);
 
   /// Emit debug line info for unprocessed functions from CUs that include
   /// emitted functions.
@@ -436,8 +437,9 @@ bool BinaryEmitter::emitFunction(BinaryFunction &Function,
     Streamer.emitELFSize(StartSymbol, SizeExpr);
   }
 
-  if (opts::UpdateDebugSections && Function.getDWARFUnit())
-    emitLineInfoEnd(Function, EndSymbol);
+  // TODO: Emit line info end for all the CUs that contain the function.
+  if (opts::UpdateDebugSections && !Function.getDWARFUnits().empty())
+    emitLineInfoEnd(Function, EndSymbol, Function.getDWARFUnits().front());
 
   // Exception handling info for the function.
   emitLSDA(Function, FF);
@@ -486,7 +488,7 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF,
         // A symbol to be emitted before the instruction to mark its location.
         MCSymbol *InstrLabel = BC.MIB->getInstLabel(Instr);
 
-        if (opts::UpdateDebugSections && BF.getDWARFUnit()) {
+        if (opts::UpdateDebugSections && !BF.getDWARFUnits().empty()) {
           LastLocSeen = emitLineInfo(BF, Instr.getLoc(), LastLocSeen,
                                      FirstInstr, InstrLabel);
           FirstInstr = false;
@@ -679,8 +681,10 @@ void BinaryEmitter::emitConstantIslands(BinaryFunction &BF, bool EmitColdPart,
 SMLoc BinaryEmitter::emitLineInfo(const BinaryFunction &BF, SMLoc NewLoc,
                                   SMLoc PrevLoc, bool FirstInstr,
                                   MCSymbol *&InstrLabel) {
-  DWARFUnit *FunctionCU = BF.getDWARFUnit();
-  const DWARFDebugLine::LineTable *FunctionLineTable = BF.getDWARFLineTable();
+  // TODO: implment emitting into line tables corresponding to multiple CUs
+  DWARFUnit *FunctionCU = BF.getDWARFUnits().front();
+  const DWARFDebugLine::LineTable *FunctionLineTable =
+      BF.getDWARFLineTableForUnit(FunctionCU);
   assert(FunctionCU && "cannot emit line info for function without CU");
 
   DebugLineTableRowRef RowReference = DebugLineTableRowRef::fromSMLoc(NewLoc);
@@ -740,13 +744,13 @@ SMLoc BinaryEmitter::emitLineInfo(const BinaryFunction &BF, SMLoc NewLoc,
 }
 
 void BinaryEmitter::emitLineInfoEnd(const BinaryFunction &BF,
-                                    MCSymbol *FunctionEndLabel) {
-  DWARFUnit *FunctionCU = BF.getDWARFUnit();
-  assert(FunctionCU && "DWARF unit expected");
+                                    MCSymbol *FunctionEndLabel,
+                                    DWARFUnit *Unit) {
+  assert(Unit && "DWARF unit expected");
   BC.Ctx->setCurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_END_SEQUENCE, 0, 0);
   const MCDwarfLoc &DwarfLoc = BC.Ctx->getCurrentDwarfLoc();
   BC.Ctx->clearDwarfLocSeen();
-  BC.getDwarfLineTable(FunctionCU->getOffset())
+  BC.getDwarfLineTable(Unit->getOffset())
       .getMCLineSections()
       .addLineEntry(MCDwarfLineEntry(FunctionEndLabel, DwarfLoc),
                     Streamer.getCurrentSectionOnly());
@@ -1115,36 +1119,40 @@ void BinaryEmitter::emitDebugLineInfoForOriginalFunctions() {
     if (Function.isEmitted())
       continue;
 
-    const DWARFDebugLine::LineTable *LineTable = Function.getDWARFLineTable();
-    if (!LineTable)
-      continue; // nothing to update for this function
+    // Loop through all CUs in the function
+    for (DWARFUnit *Unit : Function.getDWARFUnits()) {
+      const DWARFDebugLine::LineTable *LineTable =
+          Function.getDWARFLineTableForUnit(Unit);
+      if (!LineTable)
+        continue; // nothing to update for this unit
+
+      const uint64_t Address = Function.getAddress();
+      std::vector<uint32_t> Results;
+      if (!LineTable->lookupAddressRange(
+              {Address, object::SectionedAddress::UndefSection},
+              Function.getSize(), Results))
+        continue;
 
-    const uint64_t Address = Function.getAddress();
-    std::vector<uint32_t> Results;
-    if (!LineTable->lookupAddressRange(
-            {Address, object::SectionedAddress::UndefSection},
-            Function.getSize(), Results))
-      continue;
+      if (Results.empty())
+        continue;
 
-    if (Results.empty())
-      continue;
+      // The first row returned could be the last row matching the start
+      // address. Find the first row with the same address that is not the end
+      // of the sequence.
+      uint64_t FirstRow = Results.front();
+      while (FirstRow > 0) {
+        const DWARFDebugLine::Row &PrevRow = LineTable->Rows[FirstRow - 1];
+        if (PrevRow.Address.Address != Address || PrevRow.EndSequence)
+          break;
+        --FirstRow;
+      }
 
-    // The first row returned could be the last row matching the start address.
-    // Find the first row with the same address that is not the end of the
-    // sequence.
-    uint64_t FirstRow = Results.front();
-    while (FirstRow > 0) {
-      const DWARFDebugLine::Row &PrevRow = LineTable->Rows[FirstRow - 1];
-      if (PrevRow.Address.Address != Address || PrevRow.EndSequence)
-        break;
-      --FirstRow;
+      const uint64_t EndOfSequenceAddress =
+          Function.getAddress() + Function.getMaxSize();
+      BC.getDwarfLineTable(Unit->getOffset())
+          .addLineTableSequence(LineTable, FirstRow, Results.back(),
+                                EndOfSequenceAddress);
     }
-
-    const uint64_t EndOfSequenceAddress =
-        Function.getAddress() + Function.getMaxSize();
-    BC.getDwarfLineTable(Function.getDWARFUnit()->getOffset())
-        .addLineTableSequence(LineTable, FirstRow, Results.back(),
-                              EndOfSequenceAddress);
   }
 
   // For units that are completely unprocessed, use original debug line contents
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index eec68ff5a5fce..bbe04a17c0ad3 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1496,9 +1496,16 @@ Error BinaryFunction::disassemble() {
     }
 
 add_instruction:
-    if (getDWARFLineTable()) {
-      Instruction.setLoc(findDebugLineInformationForInstructionAt(
-          AbsoluteInstrAddr, getDWARFUnit(), getDWARFLineTable()));
+    // TODO: Handle multiple DWARF compilation units properly.
+    // For now, use the first unit if available.
+    if (!getDWARFUnits().empty()) {
+      DWARFUnit *FirstUnit = getDWARFUnits().front();
+      const DWARFDebugLine::LineTable *LineTable =
+          getDWARFLineTableForUnit(FirstUnit);
+      if (LineTable) {
+        Instruction.setLoc(findDebugLineInformationForInstructionAt(
+            AbsoluteInstrAddr, FirstUnit, LineTable));
+      }
     }
 
     // Record offset of the instruction for profile matching.

>From 885937d5529f3ba2b047d8658ed85aa34b515ffe Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Thu, 24 Jul 2025 20:30:10 -0700
Subject: [PATCH 2/3] Implemented multiple rows per instruction logic

---
 bolt/include/bolt/Core/BinaryContext.h   |   6 +
 bolt/include/bolt/Core/DebugData.h       | 116 +++++++++++++----
 bolt/lib/Core/BinaryContext.cpp          |  27 ++--
 bolt/lib/Core/BinaryEmitter.cpp          | 136 ++++++++++++--------
 bolt/lib/Core/BinaryFunction.cpp         |  58 ++++-----
 bolt/lib/Core/DebugData.cpp              |   2 -
 bolt/test/Inputs/multi-cu-common.h       |  10 ++
 bolt/test/Inputs/multi-cu-file1.c        |   9 ++
 bolt/test/Inputs/multi-cu-file2.c        |   8 ++
 bolt/test/Inputs/process-debug-line.sh   | 101 +++++++++++++++
 bolt/test/X86/multi-cu-debug-line.test   | 108 ++++++++++++++++
 bolt/test/perf2bolt/Inputs/perf_test.lds |  11 +-
 bolt/unittests/Core/CMakeLists.txt       |   1 +
 bolt/unittests/Core/ClusteredRows.cpp    | 152 +++++++++++++++++++++++
 14 files changed, 621 insertions(+), 124 deletions(-)
 create mode 100644 bolt/test/Inputs/multi-cu-common.h
 create mode 100644 bolt/test/Inputs/multi-cu-file1.c
 create mode 100644 bolt/test/Inputs/multi-cu-file2.c
 create mode 100755 bolt/test/Inputs/process-debug-line.sh
 create mode 100644 bolt/test/X86/multi-cu-debug-line.test
 create mode 100644 bolt/unittests/Core/ClusteredRows.cpp

diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index 91ecf89da618c..48bc9a5d1f92c 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -288,6 +288,12 @@ class BinaryContext {
   /// overwritten, but it is okay to re-generate debug info for them.
   std::set<const DWARFUnit *> ProcessedCUs;
 
+  /// DWARF-related container to manage lifecycle of groups of rows from line
+  /// tables associated with instructions. Since binary functions can span
+  /// multiple compilation units, instructions may reference debug line
+  /// information from multiple CUs.
+  ClasteredRowsContainer ClasteredRows;
+
   // Setup MCPlus target builder
   void initializeTarget(std::unique_ptr<MCPlusBuilder> TargetBuilder) {
     MIB = std::move(TargetBuilder);
diff --git a/bolt/include/bolt/Core/DebugData.h b/bolt/include/bolt/Core/DebugData.h
index 6ea3b1af1024f..048594946d8a9 100644
--- a/bolt/include/bolt/Core/DebugData.h
+++ b/bolt/include/bolt/Core/DebugData.h
@@ -135,8 +135,6 @@ struct DebugLineTableRowRef {
   uint32_t DwCompileUnitIndex;
   uint32_t RowIndex;
 
-  const static DebugLineTableRowRef NULL_ROW;
-
   bool operator==(const DebugLineTableRowRef &Rhs) const {
     return DwCompileUnitIndex == Rhs.DwCompileUnitIndex &&
            RowIndex == Rhs.RowIndex;
@@ -145,24 +143,6 @@ struct DebugLineTableRowRef {
   bool operator!=(const DebugLineTableRowRef &Rhs) const {
     return !(*this == Rhs);
   }
-
-  static DebugLineTableRowRef fromSMLoc(const SMLoc &Loc) {
-    union {
-      decltype(Loc.getPointer()) Ptr;
-      DebugLineTableRowRef Ref;
-    } U;
-    U.Ptr = Loc.getPointer();
-    return U.Ref;
-  }
-
-  SMLoc toSMLoc() const {
-    union {
-      decltype(SMLoc().getPointer()) Ptr;
-      DebugLineTableRowRef Ref;
-    } U;
-    U.Ref = *this;
-    return SMLoc::getFromPointer(U.Ptr);
-  }
 };
 
 /// Common buffer vector used for debug info handling.
@@ -210,7 +190,7 @@ class DebugRangesSectionWriter {
   static bool classof(const DebugRangesSectionWriter *Writer) {
     return Writer->getKind() == RangesWriterKind::DebugRangesWriter;
   }
-  
+
   /// Append a range to the main buffer.
   void appendToRangeBuffer(const DebugBufferVector &CUBuffer);
 
@@ -852,6 +832,100 @@ class DwarfLineTable {
   // Returns DWARF Version for this line table.
   uint16_t getDwarfVersion() const { return DwarfVersion; }
 };
+
+/// ClusteredRows represents a collection of debug line table row references.
+/// Since a Binary function can belong to multiple compilation units (CUs),
+/// a single MCInst can have multiple debug line table rows associated with it
+/// from different CUs. This class manages such clustered row references.
+///
+/// MEMORY LAYOUT AND DESIGN:
+/// This class uses a flexible array member pattern to store all
+/// DebugLineTableRowRef elements in a single contiguous memory allocation.
+/// The memory layout is:
+///
+/// +------------------+
+/// | ClusteredRows    |  <- Object header (Size + first element)
+/// | - Size           |
+/// | - Raws (element) |  <- First DebugLineTableRowRef element
+/// +------------------+
+/// | element[1]       |  <- Additional DebugLineTableRowRef elements
+/// | element[2]       |     stored immediately after the object
+/// | ...              |
+/// | element[Size-1]  |
+/// +------------------+
+///
+/// PERFORMANCE BENEFITS:
+/// - Single memory allocation: All elements are stored in one contiguous block,
+///   eliminating the need for separate heap allocations for the array.
+/// - No extra dereferencing: Elements are accessed directly via pointer
+///   arithmetic (beginPtr() + offset) rather than through an additional
+///   pointer indirection.
+/// - Cache locality: All elements are guaranteed to be adjacent in memory,
+///   improving cache performance during iteration.
+/// - Memory efficiency: No overhead from separate pointer storage or
+///   fragmented allocations.
+///
+/// The 'Raws' member serves as both the first element storage and the base
+/// address for pointer arithmetic to access subsequent elements.
+class ClusteredRows {
+public:
+  ArrayRef<DebugLineTableRowRef> getRows() const {
+    return ArrayRef<DebugLineTableRowRef>(beginPtrConst(), Size);
+  }
+  uint64_t size() const { return Size; }
+  static const ClusteredRows *fromSMLoc(const SMLoc &Loc) {
+    return reinterpret_cast<const ClusteredRows *>(Loc.getPointer());
+  }
+  SMLoc toSMLoc() const {
+    return SMLoc::getFromPointer(reinterpret_cast<const char *>(this));
+  }
+
+  template <typename T> void populate(const T Vec) {
+    assert(Vec.size() == Size && "");
+    DebugLineTableRowRef *CurRawPtr = beginPtr();
+    for (DebugLineTableRowRef RowRef : Vec) {
+      *CurRawPtr = RowRef;
+      ++CurRawPtr;
+    }
+  }
+
+private:
+  uint64_t Size;
+  DebugLineTableRowRef Raws;
+
+  ClusteredRows(uint64_t Size) : Size(Size) {}
+  static uint64_t getTotalSize(uint64_t Size) {
+    assert(Size > 0 && "Size must be greater than 0");
+    return sizeof(ClusteredRows) + (Size - 1) * sizeof(DebugLineTableRowRef);
+  }
+  const DebugLineTableRowRef *beginPtrConst() const {
+    return reinterpret_cast<const DebugLineTableRowRef *>(&Raws);
+  }
+  DebugLineTableRowRef *beginPtr() {
+    return reinterpret_cast<DebugLineTableRowRef *>(&Raws);
+  }
+
+  friend class ClasteredRowsContainer;
+};
+
+/// ClasteredRowsContainer manages the lifecycle of ClusteredRows objects.
+class ClasteredRowsContainer {
+public:
+  ClusteredRows *createClusteredRows(uint64_t Size) {
+    auto *CR = new (std::malloc(ClusteredRows::getTotalSize(Size)))
+        ClusteredRows(Size);
+    Clusters.push_back(CR);
+    return CR;
+  }
+  ~ClasteredRowsContainer() {
+    for (auto *CR : Clusters)
+      std::free(CR);
+  }
+
+private:
+  std::vector<ClusteredRows *> Clusters;
+};
+
 } // namespace bolt
 } // namespace llvm
 
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index c58d99a77f8b3..1766b0540a5cd 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -1568,23 +1568,21 @@ unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
   DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID);
   const DWARFDebugLine::LineTable *LineTable =
       DwCtx->getLineTableForUnit(SrcUnit);
-  const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
-      LineTable->Prologue.FileNames;
+  const DWARFDebugLine::FileNameEntry &FileNameEntry =
+      LineTable->Prologue.getFileNameEntry(FileIndex);
   // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
   // means empty dir.
-  assert(FileIndex > 0 && FileIndex <= FileNames.size() &&
-         "FileIndex out of range for the compilation unit.");
   StringRef Dir = "";
-  if (FileNames[FileIndex - 1].DirIdx != 0) {
+  if (FileNameEntry.DirIdx != 0) {
     if (std::optional<const char *> DirName = dwarf::toString(
             LineTable->Prologue
-                .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) {
+                .IncludeDirectories[FileNameEntry.DirIdx - 1])) {
       Dir = *DirName;
     }
   }
   StringRef FileName = "";
   if (std::optional<const char *> FName =
-          dwarf::toString(FileNames[FileIndex - 1].Name))
+          dwarf::toString(FileNameEntry.Name))
     FileName = *FName;
   assert(FileName != "");
   DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID);
@@ -1920,20 +1918,25 @@ bool BinaryContext::isMarker(const SymbolRef &Symbol) const {
 static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction,
                            const BinaryFunction *Function,
                            DWARFContext *DwCtx) {
-  DebugLineTableRowRef RowRef =
-      DebugLineTableRowRef::fromSMLoc(Instruction.getLoc());
-  if (RowRef == DebugLineTableRowRef::NULL_ROW)
+  const ClusteredRows *LineTableRows =
+      ClusteredRows::fromSMLoc(Instruction.getLoc());
+  if (LineTableRows == nullptr)
     return;
 
+  // File name and line number should be the same for all CUs.
+  // So it is sufficient to check the first one.
+  DebugLineTableRowRef RowRef = LineTableRows->getRows().front();
   const DWARFDebugLine::LineTable *LineTable = DwCtx->getLineTableForUnit(
       DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex));
 
-  assert(LineTable && "line table expected for instruction with debug info");
+  if (!LineTable)
+    return;
 
   const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1];
   StringRef FileName = "";
+
   if (std::optional<const char *> FName =
-          dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name))
+          dwarf::toString(LineTable->Prologue.getFileNameEntry(Row.File).Name))
     FileName = *FName;
   OS << " # debug line " << FileName << ":" << Row.Line;
   if (Row.Column)
diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp
index 34bda7403d259..8862f0680cb7e 100644
--- a/bolt/lib/Core/BinaryEmitter.cpp
+++ b/bolt/lib/Core/BinaryEmitter.cpp
@@ -437,9 +437,9 @@ bool BinaryEmitter::emitFunction(BinaryFunction &Function,
     Streamer.emitELFSize(StartSymbol, SizeExpr);
   }
 
-  // TODO: Emit line info end for all the CUs that contain the function.
   if (opts::UpdateDebugSections && !Function.getDWARFUnits().empty())
-    emitLineInfoEnd(Function, EndSymbol, Function.getDWARFUnits().front());
+    for (DWARFUnit *Unit : Function.getDWARFUnits())
+      emitLineInfoEnd(Function, EndSymbol, Unit);
 
   // Exception handling info for the function.
   emitLSDA(Function, FF);
@@ -681,64 +681,92 @@ void BinaryEmitter::emitConstantIslands(BinaryFunction &BF, bool EmitColdPart,
 SMLoc BinaryEmitter::emitLineInfo(const BinaryFunction &BF, SMLoc NewLoc,
                                   SMLoc PrevLoc, bool FirstInstr,
                                   MCSymbol *&InstrLabel) {
-  // TODO: implment emitting into line tables corresponding to multiple CUs
-  DWARFUnit *FunctionCU = BF.getDWARFUnits().front();
-  const DWARFDebugLine::LineTable *FunctionLineTable =
-      BF.getDWARFLineTableForUnit(FunctionCU);
-  assert(FunctionCU && "cannot emit line info for function without CU");
-
-  DebugLineTableRowRef RowReference = DebugLineTableRowRef::fromSMLoc(NewLoc);
-
-  // Check if no new line info needs to be emitted.
-  if (RowReference == DebugLineTableRowRef::NULL_ROW ||
+  if (NewLoc.getPointer() == nullptr ||
       NewLoc.getPointer() == PrevLoc.getPointer())
     return PrevLoc;
+  const ClusteredRows *Cluster = ClusteredRows::fromSMLoc(NewLoc);
+
+  auto addToLineTable = [&](DebugLineTableRowRef RowReference,
+                            const DWARFUnit *TargetCU, unsigned Flags,
+                            MCSymbol *InstrLabel,
+                            const DWARFDebugLine::Row &CurrentRow) {
+    const uint64_t TargetUnitIndex = TargetCU->getOffset();
+    unsigned TargetFilenum = CurrentRow.File;
+    const uint32_t CurrentUnitIndex = RowReference.DwCompileUnitIndex;
+    // If the CU id from the current instruction location does not
+    // match the target CU id, it means that we have come across some
+    // inlined code (by BOLT).  We must look up the CU for the instruction's
+    // original function and get the line table from that.
+    if (TargetUnitIndex != CurrentUnitIndex) {
+      // Add filename from the inlined function to the current CU.
+      TargetFilenum = BC.addDebugFilenameToUnit(
+          TargetUnitIndex, CurrentUnitIndex, CurrentRow.File);
+    }
+    BC.Ctx->setCurrentDwarfLoc(TargetFilenum, CurrentRow.Line,
+                               CurrentRow.Column, Flags, CurrentRow.Isa,
+                               CurrentRow.Discriminator);
+    const MCDwarfLoc &DwarfLoc = BC.Ctx->getCurrentDwarfLoc();
+    BC.Ctx->clearDwarfLocSeen();
+    auto &MapLineEntries = BC.getDwarfLineTable(TargetUnitIndex)
+                               .getMCLineSections()
+                               .getMCLineEntries();
+    const auto *It = MapLineEntries.find(Streamer.getCurrentSectionOnly());
+    auto NewLineEntry = MCDwarfLineEntry(InstrLabel, DwarfLoc);
+
+    // Check if line table exists and has entries before doing comparison
+    if (It != MapLineEntries.end() && !It->second.empty()) {
+      // Check if the new line entry has the same debug info as the last one
+      // to avoid duplicates. We don't compare labels since different
+      // instructions can have the same line info.
+      const auto &LastEntry = It->second.back();
+      if (LastEntry.getFileNum() == NewLineEntry.getFileNum() &&
+          LastEntry.getLine() == NewLineEntry.getLine() &&
+          LastEntry.getColumn() == NewLineEntry.getColumn() &&
+          LastEntry.getFlags() == NewLineEntry.getFlags() &&
+          LastEntry.getIsa() == NewLineEntry.getIsa() &&
+          LastEntry.getDiscriminator() == NewLineEntry.getDiscriminator())
+        return;
+    }
 
-  unsigned CurrentFilenum = 0;
-  const DWARFDebugLine::LineTable *CurrentLineTable = FunctionLineTable;
-
-  // If the CU id from the current instruction location does not
-  // match the CU id from the current function, it means that we
-  // have come across some inlined code.  We must look up the CU
-  // for the instruction's original function and get the line table
-  // from that.
-  const uint64_t FunctionUnitIndex = FunctionCU->getOffset();
-  const uint32_t CurrentUnitIndex = RowReference.DwCompileUnitIndex;
-  if (CurrentUnitIndex != FunctionUnitIndex) {
-    CurrentLineTable = BC.DwCtx->getLineTableForUnit(
-        BC.DwCtx->getCompileUnitForOffset(CurrentUnitIndex));
-    // Add filename from the inlined function to the current CU.
-    CurrentFilenum = BC.addDebugFilenameToUnit(
-        FunctionUnitIndex, CurrentUnitIndex,
-        CurrentLineTable->Rows[RowReference.RowIndex - 1].File);
-  }
-
-  const DWARFDebugLine::Row &CurrentRow =
-      CurrentLineTable->Rows[RowReference.RowIndex - 1];
-  if (!CurrentFilenum)
-    CurrentFilenum = CurrentRow.File;
-
-  unsigned Flags = (DWARF2_FLAG_IS_STMT * CurrentRow.IsStmt) |
-                   (DWARF2_FLAG_BASIC_BLOCK * CurrentRow.BasicBlock) |
-                   (DWARF2_FLAG_PROLOGUE_END * CurrentRow.PrologueEnd) |
-                   (DWARF2_FLAG_EPILOGUE_BEGIN * CurrentRow.EpilogueBegin);
-
-  // Always emit is_stmt at the beginning of function fragment.
-  if (FirstInstr)
-    Flags |= DWARF2_FLAG_IS_STMT;
-
-  BC.Ctx->setCurrentDwarfLoc(CurrentFilenum, CurrentRow.Line, CurrentRow.Column,
-                             Flags, CurrentRow.Isa, CurrentRow.Discriminator);
-  const MCDwarfLoc &DwarfLoc = BC.Ctx->getCurrentDwarfLoc();
-  BC.Ctx->clearDwarfLocSeen();
+    BC.getDwarfLineTable(TargetUnitIndex)
+        .getMCLineSections()
+        .addLineEntry(NewLineEntry, Streamer.getCurrentSectionOnly());
+  };
 
   if (!InstrLabel)
     InstrLabel = BC.Ctx->createTempSymbol();
-
-  BC.getDwarfLineTable(FunctionUnitIndex)
-      .getMCLineSections()
-      .addLineEntry(MCDwarfLineEntry(InstrLabel, DwarfLoc),
-                    Streamer.getCurrentSectionOnly());
+  for (DebugLineTableRowRef RowReference : Cluster->getRows()) {
+    const DWARFDebugLine::LineTable *CurrentLineTable =
+        BC.DwCtx->getLineTableForUnit(
+            BC.DwCtx->getCompileUnitForOffset(RowReference.DwCompileUnitIndex));
+    const DWARFDebugLine::Row &CurrentRow =
+        CurrentLineTable->Rows[RowReference.RowIndex - 1];
+    unsigned Flags = (DWARF2_FLAG_IS_STMT * CurrentRow.IsStmt) |
+                     (DWARF2_FLAG_BASIC_BLOCK * CurrentRow.BasicBlock) |
+                     (DWARF2_FLAG_PROLOGUE_END * CurrentRow.PrologueEnd) |
+                     (DWARF2_FLAG_EPILOGUE_BEGIN * CurrentRow.EpilogueBegin);
+
+    // Always emit is_stmt at the beginning of function fragment.
+    if (FirstInstr)
+      Flags |= DWARF2_FLAG_IS_STMT;
+    const auto &FunctionDwarfUnits = BF.getDWARFUnits();
+    const auto *It = std::find_if(
+        FunctionDwarfUnits.begin(), FunctionDwarfUnits.end(),
+        [RowReference](const DWARFUnit *Unit) {
+          return Unit->getOffset() == RowReference.DwCompileUnitIndex;
+        });
+    if (It != FunctionDwarfUnits.end()) {
+      addToLineTable(RowReference, *It, Flags, InstrLabel, CurrentRow);
+      continue;
+    }
+    // This rows is from CU that did not contain the original function.
+    // This might happen if BOLT moved/inlined that instruction from other CUs.
+    // In this case, we need to insert it to all CUs that the function
+    // originally beloned to.
+    for (const DWARFUnit *Unit : BF.getDWARFUnits()) {
+      addToLineTable(RowReference, Unit, Flags, InstrLabel, CurrentRow);
+    }
+  }
 
   return NewLoc;
 }
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index bbe04a17c0ad3..a3a6b31451441 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -179,37 +179,29 @@ template <typename R> static bool emptyRange(const R &Range) {
 }
 
 /// Gets debug line information for the instruction located at the given
-/// address in the original binary. The SMLoc's pointer is used
-/// to point to this information, which is represented by a
-/// DebugLineTableRowRef. The returned pointer is null if no debug line
-/// information for this instruction was found.
-static SMLoc findDebugLineInformationForInstructionAt(
+/// address in the original binary. Returns an optional DebugLineTableRowRef
+/// that references the corresponding row in the DWARF line table. Since binary
+/// functions can span multiple compilation units, this function helps
+/// associate instructions with their debug line information from the
+/// appropriate CU. Returns std::nullopt if no debug line information for
+/// this instruction was found.
+static std::optional<DebugLineTableRowRef>
+findDebugLineInformationForInstructionAt(
     uint64_t Address, DWARFUnit *Unit,
     const DWARFDebugLine::LineTable *LineTable) {
-  // We use the pointer in SMLoc to store an instance of DebugLineTableRowRef,
-  // which occupies 64 bits. Thus, we can only proceed if the struct fits into
-  // the pointer itself.
-  static_assert(
-      sizeof(decltype(SMLoc().getPointer())) >= sizeof(DebugLineTableRowRef),
-      "Cannot fit instruction debug line information into SMLoc's pointer");
-
-  SMLoc NullResult = DebugLineTableRowRef::NULL_ROW.toSMLoc();
   uint32_t RowIndex = LineTable->lookupAddress(
       {Address, object::SectionedAddress::UndefSection});
   if (RowIndex == LineTable->UnknownRowIndex)
-    return NullResult;
+    return std::nullopt;
 
   assert(RowIndex < LineTable->Rows.size() &&
          "Line Table lookup returned invalid index.");
 
-  decltype(SMLoc().getPointer()) Ptr;
-  DebugLineTableRowRef *InstructionLocation =
-      reinterpret_cast<DebugLineTableRowRef *>(&Ptr);
-
-  InstructionLocation->DwCompileUnitIndex = Unit->getOffset();
-  InstructionLocation->RowIndex = RowIndex + 1;
+  DebugLineTableRowRef InstructionLocation;
+  InstructionLocation.DwCompileUnitIndex = Unit->getOffset();
+  InstructionLocation.RowIndex = RowIndex + 1;
 
-  return SMLoc::getFromPointer(Ptr);
+  return InstructionLocation;
 }
 
 static std::string buildSectionName(StringRef Prefix, StringRef Name,
@@ -1496,15 +1488,23 @@ Error BinaryFunction::disassemble() {
     }
 
 add_instruction:
-    // TODO: Handle multiple DWARF compilation units properly.
-    // For now, use the first unit if available.
     if (!getDWARFUnits().empty()) {
-      DWARFUnit *FirstUnit = getDWARFUnits().front();
-      const DWARFDebugLine::LineTable *LineTable =
-          getDWARFLineTableForUnit(FirstUnit);
-      if (LineTable) {
-        Instruction.setLoc(findDebugLineInformationForInstructionAt(
-            AbsoluteInstrAddr, FirstUnit, LineTable));
+      SmallVector<DebugLineTableRowRef, 1> Rows;
+      for (DWARFUnit *Unit : getDWARFUnits()) {
+        const DWARFDebugLine::LineTable *LineTable =
+            getDWARFLineTableForUnit(Unit);
+        if (!LineTable)
+          continue;
+        if (std::optional<DebugLineTableRowRef> RowRef =
+                findDebugLineInformationForInstructionAt(AbsoluteInstrAddr,
+                                                         Unit, LineTable))
+          Rows.emplace_back(*RowRef);
+      }
+      if (!Rows.empty()) {
+        ClusteredRows *Cluster =
+            BC.ClasteredRows.createClusteredRows(Rows.size());
+        Cluster->populate(Rows);
+        Instruction.setLoc(Cluster->toSMLoc());
       }
     }
 
diff --git a/bolt/lib/Core/DebugData.cpp b/bolt/lib/Core/DebugData.cpp
index 521eb8d91bbc0..e05f28f08572c 100644
--- a/bolt/lib/Core/DebugData.cpp
+++ b/bolt/lib/Core/DebugData.cpp
@@ -101,8 +101,6 @@ std::optional<AttrInfo> findAttributeInfo(const DWARFDie DIE,
   return findAttributeInfo(DIE, AbbrevDecl, *Index);
 }
 
-const DebugLineTableRowRef DebugLineTableRowRef::NULL_ROW{0, 0};
-
 LLVM_ATTRIBUTE_UNUSED
 static void printLE64(const std::string &S) {
   for (uint32_t I = 0, Size = S.size(); I < Size; ++I) {
diff --git a/bolt/test/Inputs/multi-cu-common.h b/bolt/test/Inputs/multi-cu-common.h
new file mode 100644
index 0000000000000..aeb8076305dce
--- /dev/null
+++ b/bolt/test/Inputs/multi-cu-common.h
@@ -0,0 +1,10 @@
+#ifndef MULTI_CU_COMMON_H
+#define MULTI_CU_COMMON_H
+
+static inline int common_inline_function(int x) {
+  int result = x * 2;
+  result += 10;
+  return result;
+}
+
+#endif // MULTI_CU_COMMON_H
diff --git a/bolt/test/Inputs/multi-cu-file1.c b/bolt/test/Inputs/multi-cu-file1.c
new file mode 100644
index 0000000000000..f3528b2acddb8
--- /dev/null
+++ b/bolt/test/Inputs/multi-cu-file1.c
@@ -0,0 +1,9 @@
+#include "multi-cu-common.h"
+#include <stdio.h>
+
+int main() {
+  int value = 5;
+  int result = common_inline_function(value);
+  printf("File1: Result is %d\n", result);
+  return 0;
+}
diff --git a/bolt/test/Inputs/multi-cu-file2.c b/bolt/test/Inputs/multi-cu-file2.c
new file mode 100644
index 0000000000000..f33af72595afe
--- /dev/null
+++ b/bolt/test/Inputs/multi-cu-file2.c
@@ -0,0 +1,8 @@
+#include "multi-cu-common.h"
+#include <stdio.h>
+
+void helper_function() {
+  int value = 10;
+  int result = common_inline_function(value);
+  printf("File2: Helper result is %d\n", result);
+}
diff --git a/bolt/test/Inputs/process-debug-line.sh b/bolt/test/Inputs/process-debug-line.sh
new file mode 100755
index 0000000000000..b30408df922eb
--- /dev/null
+++ b/bolt/test/Inputs/process-debug-line.sh
@@ -0,0 +1,101 @@
+#!/bin/sh
+
+# Script to process llvm-dwarfdump --debug-line output and create a normalized table
+# Usage: process-debug-line.sh <debug-line.txt>
+#
+# Output format: CU_FILE LINE COLUMN FILE_NAME [additional_info]
+# This strips addresses to make rows unique and adds context about which CU and file each line belongs to
+
+if [ $# -ne 1 ]; then
+    echo "Usage: $0 <debug-line.txt>" >&2
+    exit 1
+fi
+
+debug_line_file="$1"
+
+if [ ! -f "$debug_line_file" ]; then
+    echo "Error: File '$debug_line_file' not found" >&2
+    exit 1
+fi
+
+awk '
+BEGIN {
+    cu_count = 0
+    current_cu_file = ""
+    # Initialize file names array
+    for (i = 0; i < 100; i++) current_file_names[i] = ""
+}
+
+# Track debug_line sections (new CU)
+/^debug_line\[/ {
+    cu_count++
+    current_cu_file = ""
+    # Clear file names array for new CU
+    for (i = 0; i < 100; i++) current_file_names[i] = ""
+    next
+}
+
+# Capture file names and their indices
+/^file_names\[.*\]:/ {
+    # Extract file index using more portable regex
+    if (match($0, /file_names\[[[:space:]]*([0-9]+)\]:/, arr)) {
+        file_index = arr[1]
+    } else {
+        # Fallback parsing
+        gsub(/file_names\[/, "", $0)
+        gsub(/\]:.*/, "", $0)
+        gsub(/[[:space:]]/, "", $0)
+        file_index = $0
+    }
+
+    getline  # Read the next line which contains the actual filename
+    if (match($0, /name:[[:space:]]*"([^"]*)"/, name_arr)) {
+        filename = name_arr[1]
+        current_file_names[file_index] = filename
+
+        # Extract basename for main CU file (first .c/.cpp/.cc file we see)
+        if (current_cu_file == "" && match(filename, /([^\/]*\.(c|cpp|cc))$/, cu_arr)) {
+            current_cu_file = cu_arr[1]
+        }
+    }
+    next
+}
+
+# Process line table entries
+/^0x[0-9a-f]+/ {
+    # Parse the line entry: Address Line Column File ISA Discriminator OpIndex Flags
+    if (NF >= 4) {
+        line = $2
+        column = $3
+        file_index = $4
+
+        # Get the filename for this file index
+        filename = current_file_names[file_index]
+        if (filename == "") {
+            filename = "UNKNOWN_FILE_" file_index
+        } else {
+            # Extract just the basename using portable method
+            if (match(filename, /([^\/]*)$/, basename_arr)) {
+                filename = basename_arr[1]
+            } else {
+                # Fallback: use gsub
+                gsub(/.*\//, "", filename)
+            }
+        }
+
+        # Build additional info (flags, etc.)
+        additional_info = ""
+        for (i = 8; i <= NF; i++) {
+            if (additional_info != "") additional_info = additional_info " "
+            additional_info = additional_info $i
+        }
+
+        # Output normalized row: CU_FILE LINE COLUMN FILE_NAME [additional_info]
+        printf "%s %s %s %s", current_cu_file, line, column, filename
+        if (additional_info != "") {
+            printf " %s", additional_info
+        }
+        printf "\n"
+    }
+}
+' "$debug_line_file"
diff --git a/bolt/test/X86/multi-cu-debug-line.test b/bolt/test/X86/multi-cu-debug-line.test
new file mode 100644
index 0000000000000..430b281445db5
--- /dev/null
+++ b/bolt/test/X86/multi-cu-debug-line.test
@@ -0,0 +1,108 @@
+## Test that BOLT correctly handles debug line information for functions
+## that belong to multiple compilation units (e.g., inline functions in
+## common header files). The test covers two scenarios:
+## 1. Normal processing: .debug_line section shows lines for the function 
+##    in all CUs where it was compiled, with no duplicate rows within CUs
+## 2. Functions not processed: When BOLT doesn't process functions (using 
+##    --funcs with nonexistent function), original debug info is preserved
+
+# REQUIRES: system-linux
+
+## Compile test files with debug info
+# RUN: %clang %cflags -O0 -g %S/../Inputs/multi-cu-file1.c %S/../Inputs/multi-cu-file2.c \
+# RUN:   -I%S/../Inputs -o %t.exe -Wl,-q
+
+## Test 1: Normal BOLT processing (functions are processed/optimized)
+# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections
+# RUN: llvm-dwarfdump --debug-line %t.bolt > %t.debug-line.txt
+# RUN: FileCheck %s --check-prefix=BASIC --input-file %t.debug-line.txt
+
+## Check that debug line information is present for both compilation units
+# BASIC: debug_line[{{.*}}]
+# BASIC: file_names[{{.*}}]:
+# BASIC: name: "{{.*}}multi-cu-file1.c"
+# BASIC: debug_line[{{.*}}]
+# BASIC: file_names[{{.*}}]:
+# BASIC: name: "{{.*}}multi-cu-file2.c"
+
+## Use our helper script to create a normalized table without addresses
+# RUN: %S/../Inputs/process-debug-line.sh %t.debug-line.txt > %t.normalized-debug-line.txt
+# RUN: FileCheck %s --check-prefix=NORMALIZED --input-file %t.normalized-debug-line.txt
+
+## Check that we have line entries for the inline function (lines 5, 6, 7) from multi-cu-common.h
+## in both compilation units
+# NORMALIZED: multi-cu-file1.c 5 {{[0-9]+}} multi-cu-common.h
+# NORMALIZED: multi-cu-file1.c 6 {{[0-9]+}} multi-cu-common.h
+# NORMALIZED: multi-cu-file1.c 7 {{[0-9]+}} multi-cu-common.h
+# NORMALIZED: multi-cu-file2.c 5 {{[0-9]+}} multi-cu-common.h
+# NORMALIZED: multi-cu-file2.c 6 {{[0-9]+}} multi-cu-common.h
+# NORMALIZED: multi-cu-file2.c 7 {{[0-9]+}} multi-cu-common.h
+
+## Verify that we have line entries for the inline function in multiple CUs
+## by checking that the header file appears multiple times in different contexts
+# RUN: grep -c "multi-cu-common.h" %t.debug-line.txt > %t.header-count.txt
+# RUN: FileCheck %s --check-prefix=MULTI-CU --input-file %t.header-count.txt
+
+## The header should appear in debug line info for multiple CUs
+# MULTI-CU: {{[2-9]|[1-9][0-9]+}}
+
+## Check that there are no duplicate line table rows within the same CU
+## This verifies the fix for the bug where duplicate entries were created
+# RUN: sort %t.normalized-debug-line.txt | uniq -c | \
+# RUN:   awk '$1 > 1 {print "DUPLICATE_ROW: " $0}' > %t.duplicates.txt
+# RUN: FileCheck %s --check-prefix=NO-DUPLICATES --input-file %t.duplicates.txt --allow-empty
+
+## Should have no duplicate normalized rows (file should be empty)
+## Note: Cross-CU duplicates are expected and valid (same function in different CUs)
+## but within-CU duplicates would indicate a bug
+# NO-DUPLICATES-NOT: DUPLICATE_ROW
+
+## Test 2: Functions not processed by BOLT (using --funcs with nonexistent function)
+## This tests the code path where BOLT preserves original debug info
+# RUN: llvm-bolt %t.exe -o %t.not-emitted.bolt --update-debug-sections --funcs=nonexistent_function
+# RUN: llvm-dwarfdump --debug-line %t.not-emitted.bolt > %t.not-emitted.debug-line.txt
+# RUN: FileCheck %s --check-prefix=PRESERVED-BASIC --input-file %t.not-emitted.debug-line.txt
+
+## Check that debug line information is still present for both compilation units when functions aren't processed
+# PRESERVED-BASIC: debug_line[{{.*}}]
+# PRESERVED-BASIC: file_names[{{.*}}]:
+# PRESERVED-BASIC: name: "{{.*}}multi-cu-file1.c"
+# PRESERVED-BASIC: debug_line[{{.*}}]
+# PRESERVED-BASIC: file_names[{{.*}}]:
+# PRESERVED-BASIC: name: "{{.*}}multi-cu-file2.c"
+
+## Create normalized output for the not-emitted case
+# RUN: %S/../Inputs/process-debug-line.sh %t.not-emitted.debug-line.txt > %t.not-emitted.normalized.txt
+# RUN: FileCheck %s --check-prefix=PRESERVED-NORMALIZED --input-file %t.not-emitted.normalized.txt
+
+## Check that we have line entries for the inline function (lines 5, 6, 7) from multi-cu-common.h
+## in both compilation units (preserved from original)
+# PRESERVED-NORMALIZED: multi-cu-file1.c 5 {{[0-9]+}} multi-cu-common.h
+# PRESERVED-NORMALIZED: multi-cu-file1.c 6 {{[0-9]+}} multi-cu-common.h
+# PRESERVED-NORMALIZED: multi-cu-file1.c 7 {{[0-9]+}} multi-cu-common.h
+# PRESERVED-NORMALIZED: multi-cu-file2.c 5 {{[0-9]+}} multi-cu-common.h
+# PRESERVED-NORMALIZED: multi-cu-file2.c 6 {{[0-9]+}} multi-cu-common.h
+# PRESERVED-NORMALIZED: multi-cu-file2.c 7 {{[0-9]+}} multi-cu-common.h
+
+## Verify that we have line entries for the inline function in multiple CUs (preserved)
+## by checking that the header file appears multiple times in different contexts
+# RUN: grep -c "multi-cu-common.h" %t.not-emitted.debug-line.txt > %t.preserved-header-count.txt
+# RUN: FileCheck %s --check-prefix=PRESERVED-MULTI-CU --input-file %t.preserved-header-count.txt
+
+## The header should appear in debug line info for multiple CUs (preserved from original)
+# PRESERVED-MULTI-CU: {{[2-9]|[1-9][0-9]+}}
+
+## Check that original debug info is preserved for main functions
+# RUN: grep "multi-cu-file1.c.*multi-cu-file1.c" %t.not-emitted.normalized.txt > %t.preserved-main.txt
+# RUN: FileCheck %s --check-prefix=PRESERVED-MAIN --input-file %t.preserved-main.txt
+
+# PRESERVED-MAIN: multi-cu-file1.c {{[0-9]+}} {{[0-9]+}} multi-cu-file1.c
+
+## Check that original debug info is preserved for file2 functions
+# RUN: grep "multi-cu-file2.c.*multi-cu-file2.c" %t.not-emitted.normalized.txt > %t.preserved-file2.txt
+# RUN: FileCheck %s --check-prefix=PRESERVED-FILE2 --input-file %t.preserved-file2.txt
+
+# PRESERVED-FILE2: multi-cu-file2.c {{[0-9]+}} {{[0-9]+}} multi-cu-file2.c
+
+## Note: We do not check for duplicates in Test 2 since we are preserving original debug info as-is
+## and the original may contain patterns that would be flagged as duplicates by our normalization
\ No newline at end of file
diff --git a/bolt/test/perf2bolt/Inputs/perf_test.lds b/bolt/test/perf2bolt/Inputs/perf_test.lds
index 66d925a05bebc..c2704d73a638c 100644
--- a/bolt/test/perf2bolt/Inputs/perf_test.lds
+++ b/bolt/test/perf2bolt/Inputs/perf_test.lds
@@ -1,13 +1,12 @@
 SECTIONS {
-  . = SIZEOF_HEADERS;
+  . = 0x400000 + SIZEOF_HEADERS;
   .interp : { *(.interp) }
   .note.gnu.build-id : { *(.note.gnu.build-id) }
-  . = 0x212e8;
   .dynsym         : { *(.dynsym) }
-  . = 0x31860;
+  . = 0x801000;
   .text : { *(.text*) }
-  . = 0x41c20;
+  . = 0x803000;
   .fini_array : { *(.fini_array) }
-  . = 0x54e18;
+  . = 0x805000;
   .data : { *(.data) }
-}
\ No newline at end of file
+}
diff --git a/bolt/unittests/Core/CMakeLists.txt b/bolt/unittests/Core/CMakeLists.txt
index 54e8ea10cda12..538add9baa798 100644
--- a/bolt/unittests/Core/CMakeLists.txt
+++ b/bolt/unittests/Core/CMakeLists.txt
@@ -7,6 +7,7 @@ set(LLVM_LINK_COMPONENTS
 
 add_bolt_unittest(CoreTests
   BinaryContext.cpp
+  ClusteredRows.cpp
   MCPlusBuilder.cpp
   MemoryMaps.cpp
   DynoStats.cpp
diff --git a/bolt/unittests/Core/ClusteredRows.cpp b/bolt/unittests/Core/ClusteredRows.cpp
new file mode 100644
index 0000000000000..5901f9ac5aaaa
--- /dev/null
+++ b/bolt/unittests/Core/ClusteredRows.cpp
@@ -0,0 +1,152 @@
+//===- bolt/unittest/Core/ClusteredRows.cpp ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "bolt/Core/DebugData.h"
+#include "llvm/Support/SMLoc.h"
+#include "gtest/gtest.h"
+#include <vector>
+
+using namespace llvm;
+using namespace llvm::bolt;
+
+namespace {
+
+class ClusteredRowsTest : public ::testing::Test {
+protected:
+  void SetUp() override {
+    Container = std::make_unique<ClasteredRowsContainer>();
+  }
+
+  std::unique_ptr<ClasteredRowsContainer> Container;
+};
+
+TEST_F(ClusteredRowsTest, CreateSingleElement) {
+  ClusteredRows *CR = Container->createClusteredRows(1);
+  ASSERT_NE(CR, nullptr);
+  EXPECT_EQ(CR->size(), 1u);
+
+  // Test population with single element
+  std::vector<DebugLineTableRowRef> TestRefs = {
+      {42, 100}
+  };
+  CR->populate(TestRefs);
+
+  ArrayRef<DebugLineTableRowRef> Rows = CR->getRows();
+  EXPECT_EQ(Rows.size(), 1u);
+  EXPECT_EQ(Rows[0].DwCompileUnitIndex, 42u);
+  EXPECT_EQ(Rows[0].RowIndex, 100u);
+}
+
+TEST_F(ClusteredRowsTest, CreateMultipleElements) {
+  ClusteredRows *CR = Container->createClusteredRows(3);
+  ASSERT_NE(CR, nullptr);
+  EXPECT_EQ(CR->size(), 3u);
+
+  // Test population with multiple elements
+  std::vector<DebugLineTableRowRef> TestRefs = {
+      {10, 20},
+      {30, 40},
+      {50, 60}
+  };
+  CR->populate(TestRefs);
+
+  ArrayRef<DebugLineTableRowRef> Rows = CR->getRows();
+  EXPECT_EQ(Rows.size(), 3u);
+
+  EXPECT_EQ(Rows[0].DwCompileUnitIndex, 10u);
+  EXPECT_EQ(Rows[0].RowIndex, 20u);
+
+  EXPECT_EQ(Rows[1].DwCompileUnitIndex, 30u);
+  EXPECT_EQ(Rows[1].RowIndex, 40u);
+
+  EXPECT_EQ(Rows[2].DwCompileUnitIndex, 50u);
+  EXPECT_EQ(Rows[2].RowIndex, 60u);
+}
+
+TEST_F(ClusteredRowsTest, SMLoc_Conversion) {
+  ClusteredRows *CR = Container->createClusteredRows(2);
+  ASSERT_NE(CR, nullptr);
+
+  // Test SMLoc conversion
+  SMLoc Loc = CR->toSMLoc();
+  EXPECT_TRUE(Loc.isValid());
+
+  // Test round-trip conversion
+  const ClusteredRows *CR2 = ClusteredRows::fromSMLoc(Loc);
+  EXPECT_EQ(CR, CR2);
+  EXPECT_EQ(CR2->size(), 2u);
+}
+
+TEST_F(ClusteredRowsTest, PopulateWithArrayRef) {
+  ClusteredRows *CR = Container->createClusteredRows(4);
+  ASSERT_NE(CR, nullptr);
+
+  // Test population with ArrayRef
+  DebugLineTableRowRef TestArray[] = {
+      {1, 2},
+      {3, 4},
+      {5, 6},
+      {7, 8}
+  };
+  ArrayRef<DebugLineTableRowRef> TestRefs(TestArray, 4);
+  CR->populate(TestRefs);
+
+  ArrayRef<DebugLineTableRowRef> Rows = CR->getRows();
+  EXPECT_EQ(Rows.size(), 4u);
+
+  for (size_t i = 0; i < 4; ++i) {
+    EXPECT_EQ(Rows[i].DwCompileUnitIndex, TestArray[i].DwCompileUnitIndex);
+    EXPECT_EQ(Rows[i].RowIndex, TestArray[i].RowIndex);
+  }
+}
+
+TEST_F(ClusteredRowsTest, MultipleClusteredRows) {
+  // Test creating multiple ClusteredRows objects
+  ClusteredRows *CR1 = Container->createClusteredRows(2);
+  ClusteredRows *CR2 = Container->createClusteredRows(3);
+  ClusteredRows *CR3 = Container->createClusteredRows(1);
+
+  ASSERT_NE(CR1, nullptr);
+  ASSERT_NE(CR2, nullptr);
+  ASSERT_NE(CR3, nullptr);
+
+  // Ensure they are different objects
+  EXPECT_NE(CR1, CR2);
+  EXPECT_NE(CR2, CR3);
+  EXPECT_NE(CR1, CR3);
+
+  // Verify sizes
+  EXPECT_EQ(CR1->size(), 2u);
+  EXPECT_EQ(CR2->size(), 3u);
+  EXPECT_EQ(CR3->size(), 1u);
+
+  // Populate each with different data
+  std::vector<DebugLineTableRowRef> TestRefs1 = {{100, 200}, {300, 400}};
+  std::vector<DebugLineTableRowRef> TestRefs2 = {{10, 20}, {30, 40}, {50, 60}};
+  std::vector<DebugLineTableRowRef> TestRefs3 = {{999, 888}};
+
+  CR1->populate(TestRefs1);
+  CR2->populate(TestRefs2);
+  CR3->populate(TestRefs3);
+
+  // Verify data integrity
+  ArrayRef<DebugLineTableRowRef> Rows1 = CR1->getRows();
+  ArrayRef<DebugLineTableRowRef> Rows2 = CR2->getRows();
+  ArrayRef<DebugLineTableRowRef> Rows3 = CR3->getRows();
+
+  EXPECT_EQ(Rows1[0].DwCompileUnitIndex, 100u);
+  EXPECT_EQ(Rows1[1].RowIndex, 400u);
+
+  EXPECT_EQ(Rows2[1].DwCompileUnitIndex, 30u);
+  EXPECT_EQ(Rows2[2].RowIndex, 60u);
+
+  EXPECT_EQ(Rows3[0].DwCompileUnitIndex, 999u);
+  EXPECT_EQ(Rows3[0].RowIndex, 888u);
+}
+
+} // namespace

>From abcd69590b944e24605c292ae835596115f9284d Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Tue, 29 Jul 2025 14:04:57 -0700
Subject: [PATCH 3/3] Fix lint issues

---
 bolt/lib/Core/BinaryContext.cpp       |  6 ++----
 bolt/unittests/Core/ClusteredRows.cpp | 17 +++--------------
 2 files changed, 5 insertions(+), 18 deletions(-)

diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index 1766b0540a5cd..df151f398bd54 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -1575,14 +1575,12 @@ unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
   StringRef Dir = "";
   if (FileNameEntry.DirIdx != 0) {
     if (std::optional<const char *> DirName = dwarf::toString(
-            LineTable->Prologue
-                .IncludeDirectories[FileNameEntry.DirIdx - 1])) {
+            LineTable->Prologue.IncludeDirectories[FileNameEntry.DirIdx - 1])) {
       Dir = *DirName;
     }
   }
   StringRef FileName = "";
-  if (std::optional<const char *> FName =
-          dwarf::toString(FileNameEntry.Name))
+  if (std::optional<const char *> FName = dwarf::toString(FileNameEntry.Name))
     FileName = *FName;
   assert(FileName != "");
   DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID);
diff --git a/bolt/unittests/Core/ClusteredRows.cpp b/bolt/unittests/Core/ClusteredRows.cpp
index 5901f9ac5aaaa..a75209a75dfad 100644
--- a/bolt/unittests/Core/ClusteredRows.cpp
+++ b/bolt/unittests/Core/ClusteredRows.cpp
@@ -31,9 +31,7 @@ TEST_F(ClusteredRowsTest, CreateSingleElement) {
   EXPECT_EQ(CR->size(), 1u);
 
   // Test population with single element
-  std::vector<DebugLineTableRowRef> TestRefs = {
-      {42, 100}
-  };
+  std::vector<DebugLineTableRowRef> TestRefs = {{42, 100}};
   CR->populate(TestRefs);
 
   ArrayRef<DebugLineTableRowRef> Rows = CR->getRows();
@@ -48,11 +46,7 @@ TEST_F(ClusteredRowsTest, CreateMultipleElements) {
   EXPECT_EQ(CR->size(), 3u);
 
   // Test population with multiple elements
-  std::vector<DebugLineTableRowRef> TestRefs = {
-      {10, 20},
-      {30, 40},
-      {50, 60}
-  };
+  std::vector<DebugLineTableRowRef> TestRefs = {{10, 20}, {30, 40}, {50, 60}};
   CR->populate(TestRefs);
 
   ArrayRef<DebugLineTableRowRef> Rows = CR->getRows();
@@ -87,12 +81,7 @@ TEST_F(ClusteredRowsTest, PopulateWithArrayRef) {
   ASSERT_NE(CR, nullptr);
 
   // Test population with ArrayRef
-  DebugLineTableRowRef TestArray[] = {
-      {1, 2},
-      {3, 4},
-      {5, 6},
-      {7, 8}
-  };
+  DebugLineTableRowRef TestArray[] = {{1, 2}, {3, 4}, {5, 6}, {7, 8}};
   ArrayRef<DebugLineTableRowRef> TestRefs(TestArray, 4);
   CR->populate(TestRefs);
 



More information about the llvm-commits mailing list