[llvm] [BOLT] Fix debug line emission for functions in multiple compilation units (PR #151230)

Thu Jul 31 10:46:00 PDT 2025

https://github.com/grigorypas updated https://github.com/llvm/llvm-project/pull/151230

>From 74747d14bade85c631b685c77004ac2a32fd0899 Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Thu, 24 Jul 2025 13:50:13 -0700
Subject: [PATCH 1/7] Change DwarfUnit field to vector in BinaryFunction

---
 bolt/include/bolt/Core/BinaryFunction.h | 27 ++++++---
 bolt/lib/Core/BinaryContext.cpp         | 38 +++++++-----
 bolt/lib/Core/BinaryEmitter.cpp         | 80 ++++++++++++++-----------
 bolt/lib/Core/BinaryFunction.cpp        | 13 +++-
 4 files changed, 95 insertions(+), 63 deletions(-)

diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index ae580520b9110..966559e0c6fa6 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -423,8 +423,8 @@ class BinaryFunction {
   /// Original LSDA type encoding
   unsigned LSDATypeEncoding{dwarf::DW_EH_PE_omit};
 
-  /// Containing compilation unit for the function.
-  DWARFUnit *DwarfUnit{nullptr};
+  /// All compilation units this function belongs to.
+  SmallVector<DWARFUnit *, 1> DwarfUnitVec;
 
   /// Last computed hash value. Note that the value could be recomputed using
   /// different parameters by every pass.
@@ -2414,15 +2414,24 @@ class BinaryFunction {
   void
   computeBlockHashes(HashFunction HashFunction = HashFunction::Default) const;
 
-  void setDWARFUnit(DWARFUnit *Unit) { DwarfUnit = Unit; }
+  void addDWARFUnit(DWARFUnit *Unit) { DwarfUnitVec.push_back(Unit); }
 
-  /// Return DWARF compile unit for this function.
-  DWARFUnit *getDWARFUnit() const { return DwarfUnit; }
+  void removeDWARFUnit(DWARFUnit *Unit) {
+    auto *It = std::find(DwarfUnitVec.begin(), DwarfUnitVec.end(), Unit);
+    // If found, erase it
+    if (It != DwarfUnitVec.end()) {
+      DwarfUnitVec.erase(It);
+    }
+  }
+
+  /// Return DWARF compile units for this function.
+  const SmallVector<DWARFUnit *, 1> getDWARFUnits() const {
+    return DwarfUnitVec;
+  }
 
-  /// Return line info table for this function.
-  const DWARFDebugLine::LineTable *getDWARFLineTable() const {
-    return getDWARFUnit() ? BC.DwCtx->getLineTableForUnit(getDWARFUnit())
-                          : nullptr;
+  const DWARFDebugLine::LineTable *
+  getDWARFLineTableForUnit(DWARFUnit *Unit) const {
+    return BC.DwCtx->getLineTableForUnit(Unit);
   }
 
   /// Finalize profile for the function.
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index 84f1853469709..c58d99a77f8b3 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -1697,22 +1697,35 @@ void BinaryContext::preprocessDebugInfo() {
 
     auto It = llvm::partition_point(
         AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; });
-    if (It != AllRanges.end() && It->LowPC <= FunctionAddress)
-      Function.setDWARFUnit(It->Unit);
+    if (It == AllRanges.end() || It->LowPC > FunctionAddress) {
+      continue;
+    }
+    Function.addDWARFUnit(It->Unit);
+
+    // Go forward and add all units from ranges that cover the function
+    while (++It != AllRanges.end()) {
+      if (It->LowPC <= FunctionAddress && FunctionAddress < It->HighPC) {
+        Function.addDWARFUnit(It->Unit);
+      } else {
+        break;
+      }
+    }
   }
 
   // Discover units with debug info that needs to be updated.
   for (const auto &KV : BinaryFunctions) {
     const BinaryFunction &BF = KV.second;
-    if (shouldEmit(BF) && BF.getDWARFUnit())
-      ProcessedCUs.insert(BF.getDWARFUnit());
+    if (shouldEmit(BF) && !BF.getDWARFUnits().empty())
+      for (const DWARFUnit *Unit : BF.getDWARFUnits())
+        ProcessedCUs.insert(Unit);
   }
-
   // Clear debug info for functions from units that we are not going to process.
   for (auto &KV : BinaryFunctions) {
     BinaryFunction &BF = KV.second;
-    if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit()))
-      BF.setDWARFUnit(nullptr);
+    for (auto *Unit : BF.getDWARFUnits()) {
+      if (!ProcessedCUs.count(Unit))
+        BF.removeDWARFUnit(Unit);
+    }
   }
 
   if (opts::Verbosity >= 1) {
@@ -1912,14 +1925,9 @@ static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction,
   if (RowRef == DebugLineTableRowRef::NULL_ROW)
     return;
 
-  const DWARFDebugLine::LineTable *LineTable;
-  if (Function && Function->getDWARFUnit() &&
-      Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) {
-    LineTable = Function->getDWARFLineTable();
-  } else {
-    LineTable = DwCtx->getLineTableForUnit(
-        DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex));
-  }
+  const DWARFDebugLine::LineTable *LineTable = DwCtx->getLineTableForUnit(
+      DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex));
+
   assert(LineTable && "line table expected for instruction with debug info");
 
   const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1];
diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp
index 7b5cd276fee89..34bda7403d259 100644
--- a/bolt/lib/Core/BinaryEmitter.cpp
+++ b/bolt/lib/Core/BinaryEmitter.cpp
@@ -177,7 +177,8 @@ class BinaryEmitter {
   /// Note that it does not automatically result in the insertion of the EOS
   /// marker in the line table program, but provides one to the DWARF generator
   /// when it needs it.
-  void emitLineInfoEnd(const BinaryFunction &BF, MCSymbol *FunctionEndSymbol);
+  void emitLineInfoEnd(const BinaryFunction &BF, MCSymbol *FunctionEndSymbol,
+                       DWARFUnit *Unit);
 
   /// Emit debug line info for unprocessed functions from CUs that include
   /// emitted functions.
@@ -436,8 +437,9 @@ bool BinaryEmitter::emitFunction(BinaryFunction &Function,
     Streamer.emitELFSize(StartSymbol, SizeExpr);
   }
 
-  if (opts::UpdateDebugSections && Function.getDWARFUnit())
-    emitLineInfoEnd(Function, EndSymbol);
+  // TODO: Emit line info end for all the CUs that contain the function.
+  if (opts::UpdateDebugSections && !Function.getDWARFUnits().empty())
+    emitLineInfoEnd(Function, EndSymbol, Function.getDWARFUnits().front());
 
   // Exception handling info for the function.
   emitLSDA(Function, FF);
@@ -486,7 +488,7 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF,
         // A symbol to be emitted before the instruction to mark its location.
         MCSymbol *InstrLabel = BC.MIB->getInstLabel(Instr);
 
-        if (opts::UpdateDebugSections && BF.getDWARFUnit()) {
+        if (opts::UpdateDebugSections && !BF.getDWARFUnits().empty()) {
           LastLocSeen = emitLineInfo(BF, Instr.getLoc(), LastLocSeen,
                                      FirstInstr, InstrLabel);
           FirstInstr = false;
@@ -679,8 +681,10 @@ void BinaryEmitter::emitConstantIslands(BinaryFunction &BF, bool EmitColdPart,
 SMLoc BinaryEmitter::emitLineInfo(const BinaryFunction &BF, SMLoc NewLoc,
                                   SMLoc PrevLoc, bool FirstInstr,
                                   MCSymbol *&InstrLabel) {
-  DWARFUnit *FunctionCU = BF.getDWARFUnit();
-  const DWARFDebugLine::LineTable *FunctionLineTable = BF.getDWARFLineTable();
+  // TODO: implment emitting into line tables corresponding to multiple CUs
+  DWARFUnit *FunctionCU = BF.getDWARFUnits().front();
+  const DWARFDebugLine::LineTable *FunctionLineTable =
+      BF.getDWARFLineTableForUnit(FunctionCU);
   assert(FunctionCU && "cannot emit line info for function without CU");
 
   DebugLineTableRowRef RowReference = DebugLineTableRowRef::fromSMLoc(NewLoc);
@@ -740,13 +744,13 @@ SMLoc BinaryEmitter::emitLineInfo(const BinaryFunction &BF, SMLoc NewLoc,
 }
 
 void BinaryEmitter::emitLineInfoEnd(const BinaryFunction &BF,
-                                    MCSymbol *FunctionEndLabel) {
-  DWARFUnit *FunctionCU = BF.getDWARFUnit();
-  assert(FunctionCU && "DWARF unit expected");
+                                    MCSymbol *FunctionEndLabel,
+                                    DWARFUnit *Unit) {
+  assert(Unit && "DWARF unit expected");
   BC.Ctx->setCurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_END_SEQUENCE, 0, 0);
   const MCDwarfLoc &DwarfLoc = BC.Ctx->getCurrentDwarfLoc();
   BC.Ctx->clearDwarfLocSeen();
-  BC.getDwarfLineTable(FunctionCU->getOffset())
+  BC.getDwarfLineTable(Unit->getOffset())
       .getMCLineSections()
       .addLineEntry(MCDwarfLineEntry(FunctionEndLabel, DwarfLoc),
                     Streamer.getCurrentSectionOnly());
@@ -1115,36 +1119,40 @@ void BinaryEmitter::emitDebugLineInfoForOriginalFunctions() {
     if (Function.isEmitted())
       continue;
 
-    const DWARFDebugLine::LineTable *LineTable = Function.getDWARFLineTable();
-    if (!LineTable)
-      continue; // nothing to update for this function
+    // Loop through all CUs in the function
+    for (DWARFUnit *Unit : Function.getDWARFUnits()) {
+      const DWARFDebugLine::LineTable *LineTable =
+          Function.getDWARFLineTableForUnit(Unit);
+      if (!LineTable)
+        continue; // nothing to update for this unit
+
+      const uint64_t Address = Function.getAddress();
+      std::vector<uint32_t> Results;
+      if (!LineTable->lookupAddressRange(
+              {Address, object::SectionedAddress::UndefSection},
+              Function.getSize(), Results))
+        continue;
 
-    const uint64_t Address = Function.getAddress();
-    std::vector<uint32_t> Results;
-    if (!LineTable->lookupAddressRange(
-            {Address, object::SectionedAddress::UndefSection},
-            Function.getSize(), Results))
-      continue;
+      if (Results.empty())
+        continue;
 
-    if (Results.empty())
-      continue;
+      // The first row returned could be the last row matching the start
+      // address. Find the first row with the same address that is not the end
+      // of the sequence.
+      uint64_t FirstRow = Results.front();
+      while (FirstRow > 0) {
+        const DWARFDebugLine::Row &PrevRow = LineTable->Rows[FirstRow - 1];
+        if (PrevRow.Address.Address != Address || PrevRow.EndSequence)
+          break;
+        --FirstRow;
+      }
 
-    // The first row returned could be the last row matching the start address.
-    // Find the first row with the same address that is not the end of the
-    // sequence.
-    uint64_t FirstRow = Results.front();
-    while (FirstRow > 0) {
-      const DWARFDebugLine::Row &PrevRow = LineTable->Rows[FirstRow - 1];
-      if (PrevRow.Address.Address != Address || PrevRow.EndSequence)
-        break;
-      --FirstRow;
+      const uint64_t EndOfSequenceAddress =
+          Function.getAddress() + Function.getMaxSize();
+      BC.getDwarfLineTable(Unit->getOffset())
+          .addLineTableSequence(LineTable, FirstRow, Results.back(),
+                                EndOfSequenceAddress);
     }
-
-    const uint64_t EndOfSequenceAddress =
-        Function.getAddress() + Function.getMaxSize();
-    BC.getDwarfLineTable(Function.getDWARFUnit()->getOffset())
-        .addLineTableSequence(LineTable, FirstRow, Results.back(),
-                              EndOfSequenceAddress);
   }
 
   // For units that are completely unprocessed, use original debug line contents
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index eec68ff5a5fce..bbe04a17c0ad3 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1496,9 +1496,16 @@ Error BinaryFunction::disassemble() {
     }
 
 add_instruction:
-    if (getDWARFLineTable()) {
-      Instruction.setLoc(findDebugLineInformationForInstructionAt(
-          AbsoluteInstrAddr, getDWARFUnit(), getDWARFLineTable()));
+    // TODO: Handle multiple DWARF compilation units properly.
+    // For now, use the first unit if available.
+    if (!getDWARFUnits().empty()) {
+      DWARFUnit *FirstUnit = getDWARFUnits().front();
+      const DWARFDebugLine::LineTable *LineTable =
+          getDWARFLineTableForUnit(FirstUnit);
+      if (LineTable) {
+        Instruction.setLoc(findDebugLineInformationForInstructionAt(
+            AbsoluteInstrAddr, FirstUnit, LineTable));
+      }
     }
 
     // Record offset of the instruction for profile matching.

>From 885937d5529f3ba2b047d8658ed85aa34b515ffe Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Thu, 24 Jul 2025 20:30:10 -0700
Subject: [PATCH 2/7] Implemented multiple rows per instruction logic

---
 bolt/include/bolt/Core/BinaryContext.h   |   6 +
 bolt/include/bolt/Core/DebugData.h       | 116 +++++++++++++----
 bolt/lib/Core/BinaryContext.cpp          |  27 ++--
 bolt/lib/Core/BinaryEmitter.cpp          | 136 ++++++++++++--------
 bolt/lib/Core/BinaryFunction.cpp         |  58 ++++-----
 bolt/lib/Core/DebugData.cpp              |   2 -
 bolt/test/Inputs/multi-cu-common.h       |  10 ++
 bolt/test/Inputs/multi-cu-file1.c        |   9 ++
 bolt/test/Inputs/multi-cu-file2.c        |   8 ++
 bolt/test/Inputs/process-debug-line.sh   | 101 +++++++++++++++
 bolt/test/X86/multi-cu-debug-line.test   | 108 ++++++++++++++++
 bolt/test/perf2bolt/Inputs/perf_test.lds |  11 +-
 bolt/unittests/Core/CMakeLists.txt       |   1 +
 bolt/unittests/Core/ClusteredRows.cpp    | 152 +++++++++++++++++++++++
 14 files changed, 621 insertions(+), 124 deletions(-)
 create mode 100644 bolt/test/Inputs/multi-cu-common.h
 create mode 100644 bolt/test/Inputs/multi-cu-file1.c
 create mode 100644 bolt/test/Inputs/multi-cu-file2.c
 create mode 100755 bolt/test/Inputs/process-debug-line.sh
 create mode 100644 bolt/test/X86/multi-cu-debug-line.test
 create mode 100644 bolt/unittests/Core/ClusteredRows.cpp

diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index 91ecf89da618c..48bc9a5d1f92c 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -288,6 +288,12 @@ class BinaryContext {
   /// overwritten, but it is okay to re-generate debug info for them.
   std::set<const DWARFUnit *> ProcessedCUs;
 
+  /// DWARF-related container to manage lifecycle of groups of rows from line
+  /// tables associated with instructions. Since binary functions can span
+  /// multiple compilation units, instructions may reference debug line
+  /// information from multiple CUs.
+  ClasteredRowsContainer ClasteredRows;
+
   // Setup MCPlus target builder
   void initializeTarget(std::unique_ptr<MCPlusBuilder> TargetBuilder) {
     MIB = std::move(TargetBuilder);
diff --git a/bolt/include/bolt/Core/DebugData.h b/bolt/include/bolt/Core/DebugData.h
index 6ea3b1af1024f..048594946d8a9 100644
--- a/bolt/include/bolt/Core/DebugData.h
+++ b/bolt/include/bolt/Core/DebugData.h
@@ -135,8 +135,6 @@ struct DebugLineTableRowRef {
   uint32_t DwCompileUnitIndex;
   uint32_t RowIndex;
 
-  const static DebugLineTableRowRef NULL_ROW;
-
   bool operator==(const DebugLineTableRowRef &Rhs) const {
     return DwCompileUnitIndex == Rhs.DwCompileUnitIndex &&
            RowIndex == Rhs.RowIndex;
@@ -145,24 +143,6 @@ struct DebugLineTableRowRef {
   bool operator!=(const DebugLineTableRowRef &Rhs) const {
     return !(*this == Rhs);
   }
-
-  static DebugLineTableRowRef fromSMLoc(const SMLoc &Loc) {
-    union {
-      decltype(Loc.getPointer()) Ptr;
-      DebugLineTableRowRef Ref;
-    } U;
-    U.Ptr = Loc.getPointer();
-    return U.Ref;
-  }
-
-  SMLoc toSMLoc() const {
-    union {
-      decltype(SMLoc().getPointer()) Ptr;
-      DebugLineTableRowRef Ref;
-    } U;
-    U.Ref = *this;
-    return SMLoc::getFromPointer(U.Ptr);
-  }
 };
 
 /// Common buffer vector used for debug info handling.
@@ -210,7 +190,7 @@ class DebugRangesSectionWriter {
   static bool classof(const DebugRangesSectionWriter *Writer) {
     return Writer->getKind() == RangesWriterKind::DebugRangesWriter;
   }
-  
+
   /// Append a range to the main buffer.
   void appendToRangeBuffer(const DebugBufferVector &CUBuffer);
 
@@ -852,6 +832,100 @@ class DwarfLineTable {
   // Returns DWARF Version for this line table.
   uint16_t getDwarfVersion() const { return DwarfVersion; }
 };
+
+/// ClusteredRows represents a collection of debug line table row references.
+/// Since a Binary function can belong to multiple compilation units (CUs),
+/// a single MCInst can have multiple debug line table rows associated with it
+/// from different CUs. This class manages such clustered row references.
+///
+/// MEMORY LAYOUT AND DESIGN:
+/// This class uses a flexible array member pattern to store all
+/// DebugLineTableRowRef elements in a single contiguous memory allocation.
+/// The memory layout is:
+///
+/// +------------------+
+/// | ClusteredRows    |  <- Object header (Size + first element)
+/// | - Size           |
+/// | - Raws (element) |  <- First DebugLineTableRowRef element
+/// +------------------+
+/// | element[1]       |  <- Additional DebugLineTableRowRef elements
+/// | element[2]       |     stored immediately after the object
+/// | ...              |
+/// | element[Size-1]  |
+/// +------------------+
+///
+/// PERFORMANCE BENEFITS:
+/// - Single memory allocation: All elements are stored in one contiguous block,
+///   eliminating the need for separate heap allocations for the array.
+/// - No extra dereferencing: Elements are accessed directly via pointer
+///   arithmetic (beginPtr() + offset) rather than through an additional
+///   pointer indirection.
+/// - Cache locality: All elements are guaranteed to be adjacent in memory,
+///   improving cache performance during iteration.
+/// - Memory efficiency: No overhead from separate pointer storage or
+///   fragmented allocations.
+///
+/// The 'Raws' member serves as both the first element storage and the base
+/// address for pointer arithmetic to access subsequent elements.
+class ClusteredRows {
+public:
+  ArrayRef<DebugLineTableRowRef> getRows() const {
+    return ArrayRef<DebugLineTableRowRef>(beginPtrConst(), Size);
+  }
+  uint64_t size() const { return Size; }
+  static const ClusteredRows *fromSMLoc(const SMLoc &Loc) {
+    return reinterpret_cast<const ClusteredRows *>(Loc.getPointer());
+  }
+  SMLoc toSMLoc() const {
+    return SMLoc::getFromPointer(reinterpret_cast<const char *>(this));
+  }
+
+  template <typename T> void populate(const T Vec) {
+    assert(Vec.size() == Size && "");
+    DebugLineTableRowRef *CurRawPtr = beginPtr();
+    for (DebugLineTableRowRef RowRef : Vec) {
+      *CurRawPtr = RowRef;
+      ++CurRawPtr;
+    }
+  }
+
+private:
+  uint64_t Size;
+  DebugLineTableRowRef Raws;
+
+  ClusteredRows(uint64_t Size) : Size(Size) {}
+  static uint64_t getTotalSize(uint64_t Size) {
+    assert(Size > 0 && "Size must be greater than 0");
+    return sizeof(ClusteredRows) + (Size - 1) * sizeof(DebugLineTableRowRef);
+  }
+  const DebugLineTableRowRef *beginPtrConst() const {
+    return reinterpret_cast<const DebugLineTableRowRef *>(&Raws);
+  }
+  DebugLineTableRowRef *beginPtr() {
+    return reinterpret_cast<DebugLineTableRowRef *>(&Raws);
+  }
+
+  friend class ClasteredRowsContainer;
+};
+
+/// ClasteredRowsContainer manages the lifecycle of ClusteredRows objects.
+class ClasteredRowsContainer {
+public:
+  ClusteredRows *createClusteredRows(uint64_t Size) {
+    auto *CR = new (std::malloc(ClusteredRows::getTotalSize(Size)))
+        ClusteredRows(Size);
+    Clusters.push_back(CR);
+    return CR;
+  }
+  ~ClasteredRowsContainer() {
+    for (auto *CR : Clusters)
+      std::free(CR);
+  }
+
+private:
+  std::vector<ClusteredRows *> Clusters;
+};
+
 } // namespace bolt
 } // namespace llvm
 
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index c58d99a77f8b3..1766b0540a5cd 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -1568,23 +1568,21 @@ unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
   DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID);
   const DWARFDebugLine::LineTable *LineTable =
       DwCtx->getLineTableForUnit(SrcUnit);
-  const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
-      LineTable->Prologue.FileNames;
+  const DWARFDebugLine::FileNameEntry &FileNameEntry =
+      LineTable->Prologue.getFileNameEntry(FileIndex);
   // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
   // means empty dir.
-  assert(FileIndex > 0 && FileIndex <= FileNames.size() &&
-         "FileIndex out of range for the compilation unit.");
   StringRef Dir = "";
-  if (FileNames[FileIndex - 1].DirIdx != 0) {
+  if (FileNameEntry.DirIdx != 0) {
     if (std::optional<const char *> DirName = dwarf::toString(
             LineTable->Prologue
-                .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) {
+                .IncludeDirectories[FileNameEntry.DirIdx - 1])) {
       Dir = *DirName;
     }
   }
   StringRef FileName = "";
   if (std::optional<const char *> FName =
-          dwarf::toString(FileNames[FileIndex - 1].Name))
+          dwarf::toString(FileNameEntry.Name))
     FileName = *FName;
   assert(FileName != "");
   DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID);
@@ -1920,20 +1918,25 @@ bool BinaryContext::isMarker(const SymbolRef &Symbol) const {
 static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction,
                            const BinaryFunction *Function,
                            DWARFContext *DwCtx) {
-  DebugLineTableRowRef RowRef =
-      DebugLineTableRowRef::fromSMLoc(Instruction.getLoc());
-  if (RowRef == DebugLineTableRowRef::NULL_ROW)
+  const ClusteredRows *LineTableRows =
+      ClusteredRows::fromSMLoc(Instruction.getLoc());
+  if (LineTableRows == nullptr)
     return;
 
+  // File name and line number should be the same for all CUs.
+  // So it is sufficient to check the first one.
+  DebugLineTableRowRef RowRef = LineTableRows->getRows().front();
   const DWARFDebugLine::LineTable *LineTable = DwCtx->getLineTableForUnit(
       DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex));
 
-  assert(LineTable && "line table expected for instruction with debug info");
+  if (!LineTable)
+    return;
 
   const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1];
   StringRef FileName = "";
+
   if (std::optional<const char *> FName =
-          dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name))
+          dwarf::toString(LineTable->Prologue.getFileNameEntry(Row.File).Name))
     FileName = *FName;
   OS << " # debug line " << FileName << ":" << Row.Line;
   if (Row.Column)
diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp
index 34bda7403d259..8862f0680cb7e 100644
--- a/bolt/lib/Core/BinaryEmitter.cpp
+++ b/bolt/lib/Core/BinaryEmitter.cpp
@@ -437,9 +437,9 @@ bool BinaryEmitter::emitFunction(BinaryFunction &Function,
     Streamer.emitELFSize(StartSymbol, SizeExpr);
   }
 
-  // TODO: Emit line info end for all the CUs that contain the function.
   if (opts::UpdateDebugSections && !Function.getDWARFUnits().empty())
-    emitLineInfoEnd(Function, EndSymbol, Function.getDWARFUnits().front());
+    for (DWARFUnit *Unit : Function.getDWARFUnits())
+      emitLineInfoEnd(Function, EndSymbol, Unit);
 
   // Exception handling info for the function.
   emitLSDA(Function, FF);
@@ -681,64 +681,92 @@ void BinaryEmitter::emitConstantIslands(BinaryFunction &BF, bool EmitColdPart,
 SMLoc BinaryEmitter::emitLineInfo(const BinaryFunction &BF, SMLoc NewLoc,
                                   SMLoc PrevLoc, bool FirstInstr,
                                   MCSymbol *&InstrLabel) {
-  // TODO: implment emitting into line tables corresponding to multiple CUs
-  DWARFUnit *FunctionCU = BF.getDWARFUnits().front();
-  const DWARFDebugLine::LineTable *FunctionLineTable =
-      BF.getDWARFLineTableForUnit(FunctionCU);
-  assert(FunctionCU && "cannot emit line info for function without CU");
-
-  DebugLineTableRowRef RowReference = DebugLineTableRowRef::fromSMLoc(NewLoc);
-
-  // Check if no new line info needs to be emitted.
-  if (RowReference == DebugLineTableRowRef::NULL_ROW ||
+  if (NewLoc.getPointer() == nullptr ||
       NewLoc.getPointer() == PrevLoc.getPointer())
     return PrevLoc;
+  const ClusteredRows *Cluster = ClusteredRows::fromSMLoc(NewLoc);
+
+  auto addToLineTable = [&](DebugLineTableRowRef RowReference,
+                            const DWARFUnit *TargetCU, unsigned Flags,
+                            MCSymbol *InstrLabel,
+                            const DWARFDebugLine::Row &CurrentRow) {
+    const uint64_t TargetUnitIndex = TargetCU->getOffset();
+    unsigned TargetFilenum = CurrentRow.File;
+    const uint32_t CurrentUnitIndex = RowReference.DwCompileUnitIndex;
+    // If the CU id from the current instruction location does not
+    // match the target CU id, it means that we have come across some
+    // inlined code (by BOLT).  We must look up the CU for the instruction's
+    // original function and get the line table from that.
+    if (TargetUnitIndex != CurrentUnitIndex) {
+      // Add filename from the inlined function to the current CU.
+      TargetFilenum = BC.addDebugFilenameToUnit(
+          TargetUnitIndex, CurrentUnitIndex, CurrentRow.File);
+    }
+    BC.Ctx->setCurrentDwarfLoc(TargetFilenum, CurrentRow.Line,
+                               CurrentRow.Column, Flags, CurrentRow.Isa,
+                               CurrentRow.Discriminator);
+    const MCDwarfLoc &DwarfLoc = BC.Ctx->getCurrentDwarfLoc();
+    BC.Ctx->clearDwarfLocSeen();
+    auto &MapLineEntries = BC.getDwarfLineTable(TargetUnitIndex)
+                               .getMCLineSections()
+                               .getMCLineEntries();
+    const auto *It = MapLineEntries.find(Streamer.getCurrentSectionOnly());
+    auto NewLineEntry = MCDwarfLineEntry(InstrLabel, DwarfLoc);
+
+    // Check if line table exists and has entries before doing comparison
+    if (It != MapLineEntries.end() && !It->second.empty()) {
+      // Check if the new line entry has the same debug info as the last one
+      // to avoid duplicates. We don't compare labels since different
+      // instructions can have the same line info.
+      const auto &LastEntry = It->second.back();
+      if (LastEntry.getFileNum() == NewLineEntry.getFileNum() &&
+          LastEntry.getLine() == NewLineEntry.getLine() &&
+          LastEntry.getColumn() == NewLineEntry.getColumn() &&
+          LastEntry.getFlags() == NewLineEntry.getFlags() &&
+          LastEntry.getIsa() == NewLineEntry.getIsa() &&
+          LastEntry.getDiscriminator() == NewLineEntry.getDiscriminator())
+        return;
+    }
 
-  unsigned CurrentFilenum = 0;
-  const DWARFDebugLine::LineTable *CurrentLineTable = FunctionLineTable;
-
-  // If the CU id from the current instruction location does not
-  // match the CU id from the current function, it means that we
-  // have come across some inlined code.  We must look up the CU
-  // for the instruction's original function and get the line table
-  // from that.
-  const uint64_t FunctionUnitIndex = FunctionCU->getOffset();
-  const uint32_t CurrentUnitIndex = RowReference.DwCompileUnitIndex;
-  if (CurrentUnitIndex != FunctionUnitIndex) {
-    CurrentLineTable = BC.DwCtx->getLineTableForUnit(
-        BC.DwCtx->getCompileUnitForOffset(CurrentUnitIndex));
-    // Add filename from the inlined function to the current CU.
-    CurrentFilenum = BC.addDebugFilenameToUnit(
-        FunctionUnitIndex, CurrentUnitIndex,
-        CurrentLineTable->Rows[RowReference.RowIndex - 1].File);
-  }
-
-  const DWARFDebugLine::Row &CurrentRow =
-      CurrentLineTable->Rows[RowReference.RowIndex - 1];
-  if (!CurrentFilenum)
-    CurrentFilenum = CurrentRow.File;
-
-  unsigned Flags = (DWARF2_FLAG_IS_STMT * CurrentRow.IsStmt) |
-                   (DWARF2_FLAG_BASIC_BLOCK * CurrentRow.BasicBlock) |
-                   (DWARF2_FLAG_PROLOGUE_END * CurrentRow.PrologueEnd) |
-                   (DWARF2_FLAG_EPILOGUE_BEGIN * CurrentRow.EpilogueBegin);
-
-  // Always emit is_stmt at the beginning of function fragment.
-  if (FirstInstr)
-    Flags |= DWARF2_FLAG_IS_STMT;
-
-  BC.Ctx->setCurrentDwarfLoc(CurrentFilenum, CurrentRow.Line, CurrentRow.Column,
-                             Flags, CurrentRow.Isa, CurrentRow.Discriminator);
-  const MCDwarfLoc &DwarfLoc = BC.Ctx->getCurrentDwarfLoc();
-  BC.Ctx->clearDwarfLocSeen();
+    BC.getDwarfLineTable(TargetUnitIndex)
+        .getMCLineSections()
+        .addLineEntry(NewLineEntry, Streamer.getCurrentSectionOnly());
+  };
 
   if (!InstrLabel)
     InstrLabel = BC.Ctx->createTempSymbol();
-
-  BC.getDwarfLineTable(FunctionUnitIndex)
-      .getMCLineSections()
-      .addLineEntry(MCDwarfLineEntry(InstrLabel, DwarfLoc),
-                    Streamer.getCurrentSectionOnly());
+  for (DebugLineTableRowRef RowReference : Cluster->getRows()) {
+    const DWARFDebugLine::LineTable *CurrentLineTable =
+        BC.DwCtx->getLineTableForUnit(
+            BC.DwCtx->getCompileUnitForOffset(RowReference.DwCompileUnitIndex));
+    const DWARFDebugLine::Row &CurrentRow =
+        CurrentLineTable->Rows[RowReference.RowIndex - 1];
+    unsigned Flags = (DWARF2_FLAG_IS_STMT * CurrentRow.IsStmt) |
+                     (DWARF2_FLAG_BASIC_BLOCK * CurrentRow.BasicBlock) |
+                     (DWARF2_FLAG_PROLOGUE_END * CurrentRow.PrologueEnd) |
+                     (DWARF2_FLAG_EPILOGUE_BEGIN * CurrentRow.EpilogueBegin);
+
+    // Always emit is_stmt at the beginning of function fragment.
+    if (FirstInstr)
+      Flags |= DWARF2_FLAG_IS_STMT;
+    const auto &FunctionDwarfUnits = BF.getDWARFUnits();
+    const auto *It = std::find_if(
+        FunctionDwarfUnits.begin(), FunctionDwarfUnits.end(),
+        [RowReference](const DWARFUnit *Unit) {
+          return Unit->getOffset() == RowReference.DwCompileUnitIndex;
+        });
+    if (It != FunctionDwarfUnits.end()) {
+      addToLineTable(RowReference, *It, Flags, InstrLabel, CurrentRow);
+      continue;
+    }
+    // This rows is from CU that did not contain the original function.
+    // This might happen if BOLT moved/inlined that instruction from other CUs.
+    // In this case, we need to insert it to all CUs that the function
+    // originally beloned to.
+    for (const DWARFUnit *Unit : BF.getDWARFUnits()) {
+      addToLineTable(RowReference, Unit, Flags, InstrLabel, CurrentRow);
+    }
+  }
 
   return NewLoc;
 }
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index bbe04a17c0ad3..a3a6b31451441 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -179,37 +179,29 @@ template <typename R> static bool emptyRange(const R &Range) {
 }
 
 /// Gets debug line information for the instruction located at the given
-/// address in the original binary. The SMLoc's pointer is used
-/// to point to this information, which is represented by a
-/// DebugLineTableRowRef. The returned pointer is null if no debug line
-/// information for this instruction was found.
-static SMLoc findDebugLineInformationForInstructionAt(
+/// address in the original binary. Returns an optional DebugLineTableRowRef
+/// that references the corresponding row in the DWARF line table. Since binary
+/// functions can span multiple compilation units, this function helps
+/// associate instructions with their debug line information from the
+/// appropriate CU. Returns std::nullopt if no debug line information for
+/// this instruction was found.
+static std::optional<DebugLineTableRowRef>
+findDebugLineInformationForInstructionAt(
     uint64_t Address, DWARFUnit *Unit,
     const DWARFDebugLine::LineTable *LineTable) {
-  // We use the pointer in SMLoc to store an instance of DebugLineTableRowRef,
-  // which occupies 64 bits. Thus, we can only proceed if the struct fits into
-  // the pointer itself.
-  static_assert(
-      sizeof(decltype(SMLoc().getPointer())) >= sizeof(DebugLineTableRowRef),
-      "Cannot fit instruction debug line information into SMLoc's pointer");
-
-  SMLoc NullResult = DebugLineTableRowRef::NULL_ROW.toSMLoc();
   uint32_t RowIndex = LineTable->lookupAddress(
       {Address, object::SectionedAddress::UndefSection});
   if (RowIndex == LineTable->UnknownRowIndex)
-    return NullResult;
+    return std::nullopt;
 
   assert(RowIndex < LineTable->Rows.size() &&
          "Line Table lookup returned invalid index.");
 
-  decltype(SMLoc().getPointer()) Ptr;
-  DebugLineTableRowRef *InstructionLocation =
-      reinterpret_cast<DebugLineTableRowRef *>(&Ptr);
-
-  InstructionLocation->DwCompileUnitIndex = Unit->getOffset();
-  InstructionLocation->RowIndex = RowIndex + 1;
+  DebugLineTableRowRef InstructionLocation;
+  InstructionLocation.DwCompileUnitIndex = Unit->getOffset();
+  InstructionLocation.RowIndex = RowIndex + 1;
 
-  return SMLoc::getFromPointer(Ptr);
+  return InstructionLocation;
 }
 
 static std::string buildSectionName(StringRef Prefix, StringRef Name,
@@ -1496,15 +1488,23 @@ Error BinaryFunction::disassemble() {
     }
 
 add_instruction:
-    // TODO: Handle multiple DWARF compilation units properly.
-    // For now, use the first unit if available.
     if (!getDWARFUnits().empty()) {
-      DWARFUnit *FirstUnit = getDWARFUnits().front();
-      const DWARFDebugLine::LineTable *LineTable =
-          getDWARFLineTableForUnit(FirstUnit);
-      if (LineTable) {
-        Instruction.setLoc(findDebugLineInformationForInstructionAt(
-            AbsoluteInstrAddr, FirstUnit, LineTable));
+      SmallVector<DebugLineTableRowRef, 1> Rows;
+      for (DWARFUnit *Unit : getDWARFUnits()) {
+        const DWARFDebugLine::LineTable *LineTable =
+            getDWARFLineTableForUnit(Unit);
+        if (!LineTable)
+          continue;
+        if (std::optional<DebugLineTableRowRef> RowRef =
+                findDebugLineInformationForInstructionAt(AbsoluteInstrAddr,
+                                                         Unit, LineTable))
+          Rows.emplace_back(*RowRef);
+      }
+      if (!Rows.empty()) {
+        ClusteredRows *Cluster =
+            BC.ClasteredRows.createClusteredRows(Rows.size());
+        Cluster->populate(Rows);
+        Instruction.setLoc(Cluster->toSMLoc());
       }
     }
 
diff --git a/bolt/lib/Core/DebugData.cpp b/bolt/lib/Core/DebugData.cpp
index 521eb8d91bbc0..e05f28f08572c 100644
--- a/bolt/lib/Core/DebugData.cpp
+++ b/bolt/lib/Core/DebugData.cpp
@@ -101,8 +101,6 @@ std::optional<AttrInfo> findAttributeInfo(const DWARFDie DIE,
   return findAttributeInfo(DIE, AbbrevDecl, *Index);
 }
 
-const DebugLineTableRowRef DebugLineTableRowRef::NULL_ROW{0, 0};
-
 LLVM_ATTRIBUTE_UNUSED
 static void printLE64(const std::string &S) {
   for (uint32_t I = 0, Size = S.size(); I < Size; ++I) {
diff --git a/bolt/test/Inputs/multi-cu-common.h b/bolt/test/Inputs/multi-cu-common.h
new file mode 100644
index 0000000000000..aeb8076305dce
--- /dev/null
+++ b/bolt/test/Inputs/multi-cu-common.h
@@ -0,0 +1,10 @@
+#ifndef MULTI_CU_COMMON_H
+#define MULTI_CU_COMMON_H
+
+static inline int common_inline_function(int x) {
+  int result = x * 2;
+  result += 10;
+  return result;
+}
+
+#endif // MULTI_CU_COMMON_H
diff --git a/bolt/test/Inputs/multi-cu-file1.c b/bolt/test/Inputs/multi-cu-file1.c
new file mode 100644
index 0000000000000..f3528b2acddb8
--- /dev/null
+++ b/bolt/test/Inputs/multi-cu-file1.c
@@ -0,0 +1,9 @@
+#include "multi-cu-common.h"
+#include <stdio.h>
+
+int main() {
+  int value = 5;
+  int result = common_inline_function(value);
+  printf("File1: Result is %d\n", result);
+  return 0;
+}
diff --git a/bolt/test/Inputs/multi-cu-file2.c b/bolt/test/Inputs/multi-cu-file2.c
new file mode 100644
index 0000000000000..f33af72595afe
--- /dev/null
+++ b/bolt/test/Inputs/multi-cu-file2.c
@@ -0,0 +1,8 @@
+#include "multi-cu-common.h"
+#include <stdio.h>
+
+void helper_function() {
+  int value = 10;
+  int result = common_inline_function(value);
+  printf("File2: Helper result is %d\n", result);
+}
diff --git a/bolt/test/Inputs/process-debug-line.sh b/bolt/test/Inputs/process-debug-line.sh
new file mode 100755
index 0000000000000..b30408df922eb
--- /dev/null
+++ b/bolt/test/Inputs/process-debug-line.sh
@@ -0,0 +1,101 @@
+#!/bin/sh
+
+# Script to process llvm-dwarfdump --debug-line output and create a normalized table
+# Usage: process-debug-line.sh <debug-line.txt>
+#
+# Output format: CU_FILE LINE COLUMN FILE_NAME [additional_info]
+# This strips addresses to make rows unique and adds context about which CU and file each line belongs to
+
+if [ $# -ne 1 ]; then
+    echo "Usage: $0 <debug-line.txt>" >&2
+    exit 1
+fi
+
+debug_line_file="$1"
+
+if [ ! -f "$debug_line_file" ]; then
+    echo "Error: File '$debug_line_file' not found" >&2
+    exit 1
+fi
+
+awk '
+BEGIN {
+    cu_count = 0
+    current_cu_file = ""
+    # Initialize file names array
+    for (i = 0; i < 100; i++) current_file_names[i] = ""
+}
+
+# Track debug_line sections (new CU)
+/^debug_line\[/ {
+    cu_count++
+    current_cu_file = ""
+    # Clear file names array for new CU
+    for (i = 0; i < 100; i++) current_file_names[i] = ""
+    next
+}
+
+# Capture file names and their indices
+/^file_names\[.*\]:/ {
+    # Extract file index using more portable regex
+    if (match($0, /file_names\[[[:space:]]*([0-9]+)\]:/, arr)) {
+        file_index = arr[1]
+    } else {
+        # Fallback parsing
+        gsub(/file_names\[/, "", $0)
+        gsub(/\]:.*/, "", $0)
+        gsub(/[[:space:]]/, "", $0)
+        file_index = $0
+    }
+
+    getline  # Read the next line which contains the actual filename
+    if (match($0, /name:[[:space:]]*"([^"]*)"/, name_arr)) {
+        filename = name_arr[1]
+        current_file_names[file_index] = filename
+
+        # Extract basename for main CU file (first .c/.cpp/.cc file we see)
+        if (current_cu_file == "" && match(filename, /([^\/]*\.(c|cpp|cc))$/, cu_arr)) {
+            current_cu_file = cu_arr[1]
+        }
+    }
+    next
+}
+
+# Process line table entries
+/^0x[0-9a-f]+/ {
+    # Parse the line entry: Address Line Column File ISA Discriminator OpIndex Flags
+    if (NF >= 4) {
+        line = $2
+        column = $3
+        file_index = $4
+
+        # Get the filename for this file index
+        filename = current_file_names[file_index]
+        if (filename == "") {
+            filename = "UNKNOWN_FILE_" file_index
+        } else {
+            # Extract just the basename using portable method
+            if (match(filename, /([^\/]*)$/, basename_arr)) {
+                filename = basename_arr[1]
+            } else {
+                # Fallback: use gsub
+                gsub(/.*\//, "", filename)
+            }
+        }
+
+        # Build additional info (flags, etc.)
+        additional_info = ""
+        for (i = 8; i <= NF; i++) {
+            if (additional_info != "") additional_info = additional_info " "
+            additional_info = additional_info $i
+        }
+
+        # Output normalized row: CU_FILE LINE COLUMN FILE_NAME [additional_info]
+        printf "%s %s %s %s", current_cu_file, line, column, filename
+        if (additional_info != "") {
+            printf " %s", additional_info
+        }
+        printf "\n"
+    }
+}
+' "$debug_line_file"
diff --git a/bolt/test/X86/multi-cu-debug-line.test b/bolt/test/X86/multi-cu-debug-line.test
new file mode 100644
index 0000000000000..430b281445db5
--- /dev/null
+++ b/bolt/test/X86/multi-cu-debug-line.test
@@ -0,0 +1,108 @@
+## Test that BOLT correctly handles debug line information for functions
+## that belong to multiple compilation units (e.g., inline functions in
+## common header files). The test covers two scenarios:
+## 1. Normal processing: .debug_line section shows lines for the function 
+##    in all CUs where it was compiled, with no duplicate rows within CUs
+## 2. Functions not processed: When BOLT doesn't process functions (using 
+##    --funcs with nonexistent function), original debug info is preserved
+
+# REQUIRES: system-linux
+
+## Compile test files with debug info
+# RUN: %clang %cflags -O0 -g %S/../Inputs/multi-cu-file1.c %S/../Inputs/multi-cu-file2.c \
+# RUN:   -I%S/../Inputs -o %t.exe -Wl,-q
+
+## Test 1: Normal BOLT processing (functions are processed/optimized)
+# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections
+# RUN: llvm-dwarfdump --debug-line %t.bolt > %t.debug-line.txt
+# RUN: FileCheck %s --check-prefix=BASIC --input-file %t.debug-line.txt
+
+## Check that debug line information is present for both compilation units
+# BASIC: debug_line[{{.*}}]
+# BASIC: file_names[{{.*}}]:
+# BASIC: name: "{{.*}}multi-cu-file1.c"
+# BASIC: debug_line[{{.*}}]
+# BASIC: file_names[{{.*}}]:
+# BASIC: name: "{{.*}}multi-cu-file2.c"
+
+## Use our helper script to create a normalized table without addresses
+# RUN: %S/../Inputs/process-debug-line.sh %t.debug-line.txt > %t.normalized-debug-line.txt
+# RUN: FileCheck %s --check-prefix=NORMALIZED --input-file %t.normalized-debug-line.txt
+
+## Check that we have line entries for the inline function (lines 5, 6, 7) from multi-cu-common.h
+## in both compilation units
+# NORMALIZED: multi-cu-file1.c 5 {{[0-9]+}} multi-cu-common.h
+# NORMALIZED: multi-cu-file1.c 6 {{[0-9]+}} multi-cu-common.h
+# NORMALIZED: multi-cu-file1.c 7 {{[0-9]+}} multi-cu-common.h
+# NORMALIZED: multi-cu-file2.c 5 {{[0-9]+}} multi-cu-common.h
+# NORMALIZED: multi-cu-file2.c 6 {{[0-9]+}} multi-cu-common.h
+# NORMALIZED: multi-cu-file2.c 7 {{[0-9]+}} multi-cu-common.h
+
+## Verify that we have line entries for the inline function in multiple CUs
+## by checking that the header file appears multiple times in different contexts
+# RUN: grep -c "multi-cu-common.h" %t.debug-line.txt > %t.header-count.txt
+# RUN: FileCheck %s --check-prefix=MULTI-CU --input-file %t.header-count.txt
+
+## The header should appear in debug line info for multiple CUs
+# MULTI-CU: {{[2-9]|[1-9][0-9]+}}
+
+## Check that there are no duplicate line table rows within the same CU
+## This verifies the fix for the bug where duplicate entries were created
+# RUN: sort %t.normalized-debug-line.txt | uniq -c | \
+# RUN:   awk '$1 > 1 {print "DUPLICATE_ROW: " $0}' > %t.duplicates.txt
+# RUN: FileCheck %s --check-prefix=NO-DUPLICATES --input-file %t.duplicates.txt --allow-empty
+
+## Should have no duplicate normalized rows (file should be empty)
+## Note: Cross-CU duplicates are expected and valid (same function in different CUs)
+## but within-CU duplicates would indicate a bug
+# NO-DUPLICATES-NOT: DUPLICATE_ROW
+
+## Test 2: Functions not processed by BOLT (using --funcs with nonexistent function)
+## This tests the code path where BOLT preserves original debug info
+# RUN: llvm-bolt %t.exe -o %t.not-emitted.bolt --update-debug-sections --funcs=nonexistent_function
+# RUN: llvm-dwarfdump --debug-line %t.not-emitted.bolt > %t.not-emitted.debug-line.txt
+# RUN: FileCheck %s --check-prefix=PRESERVED-BASIC --input-file %t.not-emitted.debug-line.txt
+
+## Check that debug line information is still present for both compilation units when functions aren't processed
+# PRESERVED-BASIC: debug_line[{{.*}}]
+# PRESERVED-BASIC: file_names[{{.*}}]:
+# PRESERVED-BASIC: name: "{{.*}}multi-cu-file1.c"
+# PRESERVED-BASIC: debug_line[{{.*}}]
+# PRESERVED-BASIC: file_names[{{.*}}]:
+# PRESERVED-BASIC: name: "{{.*}}multi-cu-file2.c"
+
+## Create normalized output for the not-emitted case
+# RUN: %S/../Inputs/process-debug-line.sh %t.not-emitted.debug-line.txt > %t.not-emitted.normalized.txt
+# RUN: FileCheck %s --check-prefix=PRESERVED-NORMALIZED --input-file %t.not-emitted.normalized.txt
+
+## Check that we have line entries for the inline function (lines 5, 6, 7) from multi-cu-common.h
+## in both compilation units (preserved from original)
+# PRESERVED-NORMALIZED: multi-cu-file1.c 5 {{[0-9]+}} multi-cu-common.h
+# PRESERVED-NORMALIZED: multi-cu-file1.c 6 {{[0-9]+}} multi-cu-common.h
+# PRESERVED-NORMALIZED: multi-cu-file1.c 7 {{[0-9]+}} multi-cu-common.h
+# PRESERVED-NORMALIZED: multi-cu-file2.c 5 {{[0-9]+}} multi-cu-common.h
+# PRESERVED-NORMALIZED: multi-cu-file2.c 6 {{[0-9]+}} multi-cu-common.h
+# PRESERVED-NORMALIZED: multi-cu-file2.c 7 {{[0-9]+}} multi-cu-common.h
+
+## Verify that we have line entries for the inline function in multiple CUs (preserved)
+## by checking that the header file appears multiple times in different contexts
+# RUN: grep -c "multi-cu-common.h" %t.not-emitted.debug-line.txt > %t.preserved-header-count.txt
+# RUN: FileCheck %s --check-prefix=PRESERVED-MULTI-CU --input-file %t.preserved-header-count.txt
+
+## The header should appear in debug line info for multiple CUs (preserved from original)
+# PRESERVED-MULTI-CU: {{[2-9]|[1-9][0-9]+}}
+
+## Check that original debug info is preserved for main functions
+# RUN: grep "multi-cu-file1.c.*multi-cu-file1.c" %t.not-emitted.normalized.txt > %t.preserved-main.txt
+# RUN: FileCheck %s --check-prefix=PRESERVED-MAIN --input-file %t.preserved-main.txt
+
+# PRESERVED-MAIN: multi-cu-file1.c {{[0-9]+}} {{[0-9]+}} multi-cu-file1.c
+
+## Check that original debug info is preserved for file2 functions
+# RUN: grep "multi-cu-file2.c.*multi-cu-file2.c" %t.not-emitted.normalized.txt > %t.preserved-file2.txt
+# RUN: FileCheck %s --check-prefix=PRESERVED-FILE2 --input-file %t.preserved-file2.txt
+
+# PRESERVED-FILE2: multi-cu-file2.c {{[0-9]+}} {{[0-9]+}} multi-cu-file2.c
+
+## Note: We do not check for duplicates in Test 2 since we are preserving original debug info as-is
+## and the original may contain patterns that would be flagged as duplicates by our normalization
\ No newline at end of file
diff --git a/bolt/test/perf2bolt/Inputs/perf_test.lds b/bolt/test/perf2bolt/Inputs/perf_test.lds
index 66d925a05bebc..c2704d73a638c 100644
--- a/bolt/test/perf2bolt/Inputs/perf_test.lds
+++ b/bolt/test/perf2bolt/Inputs/perf_test.lds
@@ -1,13 +1,12 @@
 SECTIONS {
-  . = SIZEOF_HEADERS;
+  . = 0x400000 + SIZEOF_HEADERS;
   .interp : { *(.interp) }
   .note.gnu.build-id : { *(.note.gnu.build-id) }
-  . = 0x212e8;
   .dynsym         : { *(.dynsym) }
-  . = 0x31860;
+  . = 0x801000;
   .text : { *(.text*) }
-  . = 0x41c20;
+  . = 0x803000;
   .fini_array : { *(.fini_array) }
-  . = 0x54e18;
+  . = 0x805000;
   .data : { *(.data) }
-}
\ No newline at end of file
+}
diff --git a/bolt/unittests/Core/CMakeLists.txt b/bolt/unittests/Core/CMakeLists.txt
index 54e8ea10cda12..538add9baa798 100644
--- a/bolt/unittests/Core/CMakeLists.txt
+++ b/bolt/unittests/Core/CMakeLists.txt
@@ -7,6 +7,7 @@ set(LLVM_LINK_COMPONENTS
 
 add_bolt_unittest(CoreTests
   BinaryContext.cpp
+  ClusteredRows.cpp
   MCPlusBuilder.cpp
   MemoryMaps.cpp
   DynoStats.cpp
diff --git a/bolt/unittests/Core/ClusteredRows.cpp b/bolt/unittests/Core/ClusteredRows.cpp
new file mode 100644
index 0000000000000..5901f9ac5aaaa
--- /dev/null
+++ b/bolt/unittests/Core/ClusteredRows.cpp
@@ -0,0 +1,152 @@
+//===- bolt/unittest/Core/ClusteredRows.cpp ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "bolt/Core/DebugData.h"
+#include "llvm/Support/SMLoc.h"
+#include "gtest/gtest.h"
+#include <vector>
+
+using namespace llvm;
+using namespace llvm::bolt;
+
+namespace {
+
+class ClusteredRowsTest : public ::testing::Test {
+protected:
+  void SetUp() override {
+    Container = std::make_unique<ClasteredRowsContainer>();
+  }
+
+  std::unique_ptr<ClasteredRowsContainer> Container;
+};
+
+TEST_F(ClusteredRowsTest, CreateSingleElement) {
+  ClusteredRows *CR = Container->createClusteredRows(1);
+  ASSERT_NE(CR, nullptr);
+  EXPECT_EQ(CR->size(), 1u);
+
+  // Test population with single element
+  std::vector<DebugLineTableRowRef> TestRefs = {
+      {42, 100}
+  };
+  CR->populate(TestRefs);
+
+  ArrayRef<DebugLineTableRowRef> Rows = CR->getRows();
+  EXPECT_EQ(Rows.size(), 1u);
+  EXPECT_EQ(Rows[0].DwCompileUnitIndex, 42u);
+  EXPECT_EQ(Rows[0].RowIndex, 100u);
+}
+
+TEST_F(ClusteredRowsTest, CreateMultipleElements) {
+  ClusteredRows *CR = Container->createClusteredRows(3);
+  ASSERT_NE(CR, nullptr);
+  EXPECT_EQ(CR->size(), 3u);
+
+  // Test population with multiple elements
+  std::vector<DebugLineTableRowRef> TestRefs = {
+      {10, 20},
+      {30, 40},
+      {50, 60}
+  };
+  CR->populate(TestRefs);
+
+  ArrayRef<DebugLineTableRowRef> Rows = CR->getRows();
+  EXPECT_EQ(Rows.size(), 3u);
+
+  EXPECT_EQ(Rows[0].DwCompileUnitIndex, 10u);
+  EXPECT_EQ(Rows[0].RowIndex, 20u);
+
+  EXPECT_EQ(Rows[1].DwCompileUnitIndex, 30u);
+  EXPECT_EQ(Rows[1].RowIndex, 40u);
+
+  EXPECT_EQ(Rows[2].DwCompileUnitIndex, 50u);
+  EXPECT_EQ(Rows[2].RowIndex, 60u);
+}
+
+TEST_F(ClusteredRowsTest, SMLoc_Conversion) {
+  ClusteredRows *CR = Container->createClusteredRows(2);
+  ASSERT_NE(CR, nullptr);
+
+  // Test SMLoc conversion
+  SMLoc Loc = CR->toSMLoc();
+  EXPECT_TRUE(Loc.isValid());
+
+  // Test round-trip conversion
+  const ClusteredRows *CR2 = ClusteredRows::fromSMLoc(Loc);
+  EXPECT_EQ(CR, CR2);
+  EXPECT_EQ(CR2->size(), 2u);
+}
+
+TEST_F(ClusteredRowsTest, PopulateWithArrayRef) {
+  ClusteredRows *CR = Container->createClusteredRows(4);
+  ASSERT_NE(CR, nullptr);
+
+  // Test population with ArrayRef
+  DebugLineTableRowRef TestArray[] = {
+      {1, 2},
+      {3, 4},
+      {5, 6},
+      {7, 8}
+  };
+  ArrayRef<DebugLineTableRowRef> TestRefs(TestArray, 4);
+  CR->populate(TestRefs);
+
+  ArrayRef<DebugLineTableRowRef> Rows = CR->getRows();
+  EXPECT_EQ(Rows.size(), 4u);
+
+  for (size_t i = 0; i < 4; ++i) {
+    EXPECT_EQ(Rows[i].DwCompileUnitIndex, TestArray[i].DwCompileUnitIndex);
+    EXPECT_EQ(Rows[i].RowIndex, TestArray[i].RowIndex);
+  }
+}
+
+TEST_F(ClusteredRowsTest, MultipleClusteredRows) {
+  // Test creating multiple ClusteredRows objects
+  ClusteredRows *CR1 = Container->createClusteredRows(2);
+  ClusteredRows *CR2 = Container->createClusteredRows(3);
+  ClusteredRows *CR3 = Container->createClusteredRows(1);
+
+  ASSERT_NE(CR1, nullptr);
+  ASSERT_NE(CR2, nullptr);
+  ASSERT_NE(CR3, nullptr);
+
+  // Ensure they are different objects
+  EXPECT_NE(CR1, CR2);
+  EXPECT_NE(CR2, CR3);
+  EXPECT_NE(CR1, CR3);
+
+  // Verify sizes
+  EXPECT_EQ(CR1->size(), 2u);
+  EXPECT_EQ(CR2->size(), 3u);
+  EXPECT_EQ(CR3->size(), 1u);
+
+  // Populate each with different data
+  std::vector<DebugLineTableRowRef> TestRefs1 = {{100, 200}, {300, 400}};
+  std::vector<DebugLineTableRowRef> TestRefs2 = {{10, 20}, {30, 40}, {50, 60}};
+  std::vector<DebugLineTableRowRef> TestRefs3 = {{999, 888}};
+
+  CR1->populate(TestRefs1);
+  CR2->populate(TestRefs2);
+  CR3->populate(TestRefs3);
+
+  // Verify data integrity
+  ArrayRef<DebugLineTableRowRef> Rows1 = CR1->getRows();
+  ArrayRef<DebugLineTableRowRef> Rows2 = CR2->getRows();
+  ArrayRef<DebugLineTableRowRef> Rows3 = CR3->getRows();
+
+  EXPECT_EQ(Rows1[0].DwCompileUnitIndex, 100u);
+  EXPECT_EQ(Rows1[1].RowIndex, 400u);
+
+  EXPECT_EQ(Rows2[1].DwCompileUnitIndex, 30u);
+  EXPECT_EQ(Rows2[2].RowIndex, 60u);
+
+  EXPECT_EQ(Rows3[0].DwCompileUnitIndex, 999u);
+  EXPECT_EQ(Rows3[0].RowIndex, 888u);
+}
+
+} // namespace

>From abcd69590b944e24605c292ae835596115f9284d Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Tue, 29 Jul 2025 14:04:57 -0700
Subject: [PATCH 3/7] Fix lint issues

---
 bolt/lib/Core/BinaryContext.cpp       |  6 ++----
 bolt/unittests/Core/ClusteredRows.cpp | 17 +++--------------
 2 files changed, 5 insertions(+), 18 deletions(-)

diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index 1766b0540a5cd..df151f398bd54 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -1575,14 +1575,12 @@ unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
   StringRef Dir = "";
   if (FileNameEntry.DirIdx != 0) {
     if (std::optional<const char *> DirName = dwarf::toString(
-            LineTable->Prologue
-                .IncludeDirectories[FileNameEntry.DirIdx - 1])) {
+            LineTable->Prologue.IncludeDirectories[FileNameEntry.DirIdx - 1])) {
       Dir = *DirName;
     }
   }
   StringRef FileName = "";
-  if (std::optional<const char *> FName =
-          dwarf::toString(FileNameEntry.Name))
+  if (std::optional<const char *> FName = dwarf::toString(FileNameEntry.Name))
     FileName = *FName;
   assert(FileName != "");
   DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID);
diff --git a/bolt/unittests/Core/ClusteredRows.cpp b/bolt/unittests/Core/ClusteredRows.cpp
index 5901f9ac5aaaa..a75209a75dfad 100644
--- a/bolt/unittests/Core/ClusteredRows.cpp
+++ b/bolt/unittests/Core/ClusteredRows.cpp
@@ -31,9 +31,7 @@ TEST_F(ClusteredRowsTest, CreateSingleElement) {
   EXPECT_EQ(CR->size(), 1u);
 
   // Test population with single element
-  std::vector<DebugLineTableRowRef> TestRefs = {
-      {42, 100}
-  };
+  std::vector<DebugLineTableRowRef> TestRefs = {{42, 100}};
   CR->populate(TestRefs);
 
   ArrayRef<DebugLineTableRowRef> Rows = CR->getRows();
@@ -48,11 +46,7 @@ TEST_F(ClusteredRowsTest, CreateMultipleElements) {
   EXPECT_EQ(CR->size(), 3u);
 
   // Test population with multiple elements
-  std::vector<DebugLineTableRowRef> TestRefs = {
-      {10, 20},
-      {30, 40},
-      {50, 60}
-  };
+  std::vector<DebugLineTableRowRef> TestRefs = {{10, 20}, {30, 40}, {50, 60}};
   CR->populate(TestRefs);
 
   ArrayRef<DebugLineTableRowRef> Rows = CR->getRows();
@@ -87,12 +81,7 @@ TEST_F(ClusteredRowsTest, PopulateWithArrayRef) {
   ASSERT_NE(CR, nullptr);
 
   // Test population with ArrayRef
-  DebugLineTableRowRef TestArray[] = {
-      {1, 2},
-      {3, 4},
-      {5, 6},
-      {7, 8}
-  };
+  DebugLineTableRowRef TestArray[] = {{1, 2}, {3, 4}, {5, 6}, {7, 8}};
   ArrayRef<DebugLineTableRowRef> TestRefs(TestArray, 4);
   CR->populate(TestRefs);
 

>From 9391b3a7861ed4495b7ad3308dd7a0d46e1a008f Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Tue, 29 Jul 2025 16:28:32 -0700
Subject: [PATCH 4/7] Modify awk script to make it work with older versions

---
 bolt/test/Inputs/process-debug-line.sh | 52 ++++++++++++++------------
 1 file changed, 28 insertions(+), 24 deletions(-)

diff --git a/bolt/test/Inputs/process-debug-line.sh b/bolt/test/Inputs/process-debug-line.sh
index b30408df922eb..44cbcd1e5984a 100755
--- a/bolt/test/Inputs/process-debug-line.sh
+++ b/bolt/test/Inputs/process-debug-line.sh
@@ -23,7 +23,9 @@ BEGIN {
     cu_count = 0
     current_cu_file = ""
     # Initialize file names array
-    for (i = 0; i < 100; i++) current_file_names[i] = ""
+    for (i = 0; i < 100; i++) {
+        current_file_names[i] = ""
+    }
 }
 
 # Track debug_line sections (new CU)
@@ -31,31 +33,34 @@ BEGIN {
     cu_count++
     current_cu_file = ""
     # Clear file names array for new CU
-    for (i = 0; i < 100; i++) current_file_names[i] = ""
+    for (i = 0; i < 100; i++) {
+        current_file_names[i] = ""
+    }
     next
 }
 
 # Capture file names and their indices
 /^file_names\[.*\]:/ {
-    # Extract file index using more portable regex
-    if (match($0, /file_names\[[[:space:]]*([0-9]+)\]:/, arr)) {
-        file_index = arr[1]
-    } else {
-        # Fallback parsing
-        gsub(/file_names\[/, "", $0)
-        gsub(/\]:.*/, "", $0)
-        gsub(/[[:space:]]/, "", $0)
-        file_index = $0
-    }
+    # Extract file index using simple string operations
+    line_copy = $0
+    gsub(/file_names\[/, "", line_copy)
+    gsub(/\]:.*/, "", line_copy)
+    gsub(/[ \t]/, "", line_copy)
+    file_index = line_copy
 
     getline  # Read the next line which contains the actual filename
-    if (match($0, /name:[[:space:]]*"([^"]*)"/, name_arr)) {
-        filename = name_arr[1]
+    # Extract filename from name: "filename" format
+    if (match($0, /name:[ \t]*"/)) {
+        filename = $0
+        gsub(/.*name:[ \t]*"/, "", filename)
+        gsub(/".*/, "", filename)
         current_file_names[file_index] = filename
 
         # Extract basename for main CU file (first .c/.cpp/.cc file we see)
-        if (current_cu_file == "" && match(filename, /([^\/]*\.(c|cpp|cc))$/, cu_arr)) {
-            current_cu_file = cu_arr[1]
+        if (current_cu_file == "" && match(filename, /\.(c|cpp|cc)$/)) {
+            cu_filename = filename
+            gsub(/.*\//, "", cu_filename)
+            current_cu_file = cu_filename
         }
     }
     next
@@ -74,19 +79,18 @@ BEGIN {
         if (filename == "") {
             filename = "UNKNOWN_FILE_" file_index
         } else {
-            # Extract just the basename using portable method
-            if (match(filename, /([^\/]*)$/, basename_arr)) {
-                filename = basename_arr[1]
-            } else {
-                # Fallback: use gsub
-                gsub(/.*\//, "", filename)
-            }
+            # Extract just the basename
+            basename = filename
+            gsub(/.*\//, "", basename)
+            filename = basename
         }
 
         # Build additional info (flags, etc.)
         additional_info = ""
         for (i = 8; i <= NF; i++) {
-            if (additional_info != "") additional_info = additional_info " "
+            if (additional_info != "") {
+                additional_info = additional_info " "
+            }
             additional_info = additional_info $i
         }
 

>From 41edb2b8c492125e89732b58dc3e7fda333bf015 Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Tue, 29 Jul 2025 17:22:00 -0700
Subject: [PATCH 5/7] Minor fixes

---
 bolt/include/bolt/Core/BinaryFunction.h | 2 +-
 bolt/lib/Core/BinaryContext.cpp         | 8 +++++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index 966559e0c6fa6..ec56ff3e37dd2 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -2425,7 +2425,7 @@ class BinaryFunction {
   }
 
   /// Return DWARF compile units for this function.
-  const SmallVector<DWARFUnit *, 1> getDWARFUnits() const {
+  const SmallVector<DWARFUnit *, 1>& getDWARFUnits() const {
     return DwarfUnitVec;
   }
 
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index df151f398bd54..6cbb17bd4e926 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -1718,9 +1718,15 @@ void BinaryContext::preprocessDebugInfo() {
   // Clear debug info for functions from units that we are not going to process.
   for (auto &KV : BinaryFunctions) {
     BinaryFunction &BF = KV.second;
+    // Collect units to remove to avoid iterator invalidation
+    SmallVector<DWARFUnit *, 1> UnitsToRemove;
     for (auto *Unit : BF.getDWARFUnits()) {
       if (!ProcessedCUs.count(Unit))
-        BF.removeDWARFUnit(Unit);
+        UnitsToRemove.push_back(Unit);
+    }
+    // Remove the collected units
+    for (auto *Unit : UnitsToRemove) {
+      BF.removeDWARFUnit(Unit);
     }
   }
 

>From 44bf8bb419cc79a58a7a0aabf5778fcad81de142 Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Wed, 30 Jul 2025 19:41:02 -0700
Subject: [PATCH 6/7] Fixed typos

---
 bolt/include/bolt/Core/BinaryContext.h |  2 +-
 bolt/include/bolt/Core/DebugData.h     | 32 ++++++++------------------
 bolt/lib/Core/BinaryFunction.cpp       |  2 +-
 bolt/unittests/Core/ClusteredRows.cpp  |  4 ++--
 4 files changed, 13 insertions(+), 27 deletions(-)

diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index 48bc9a5d1f92c..72c8817daa714 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -292,7 +292,7 @@ class BinaryContext {
   /// tables associated with instructions. Since binary functions can span
   /// multiple compilation units, instructions may reference debug line
   /// information from multiple CUs.
-  ClasteredRowsContainer ClasteredRows;
+  ClusteredRowsContainer ClusteredRows;
 
   // Setup MCPlus target builder
   void initializeTarget(std::unique_ptr<MCPlusBuilder> TargetBuilder) {
diff --git a/bolt/include/bolt/Core/DebugData.h b/bolt/include/bolt/Core/DebugData.h
index 048594946d8a9..adbce0bb1d5b6 100644
--- a/bolt/include/bolt/Core/DebugData.h
+++ b/bolt/include/bolt/Core/DebugData.h
@@ -834,9 +834,6 @@ class DwarfLineTable {
 };
 
 /// ClusteredRows represents a collection of debug line table row references.
-/// Since a Binary function can belong to multiple compilation units (CUs),
-/// a single MCInst can have multiple debug line table rows associated with it
-/// from different CUs. This class manages such clustered row references.
 ///
 /// MEMORY LAYOUT AND DESIGN:
 /// This class uses a flexible array member pattern to store all
@@ -846,7 +843,7 @@ class DwarfLineTable {
 /// +------------------+
 /// | ClusteredRows    |  <- Object header (Size + first element)
 /// | - Size           |
-/// | - Raws (element) |  <- First DebugLineTableRowRef element
+/// | - Rows (element) |  <- First DebugLineTableRowRef element
 /// +------------------+
 /// | element[1]       |  <- Additional DebugLineTableRowRef elements
 /// | element[2]       |     stored immediately after the object
@@ -854,18 +851,7 @@ class DwarfLineTable {
 /// | element[Size-1]  |
 /// +------------------+
 ///
-/// PERFORMANCE BENEFITS:
-/// - Single memory allocation: All elements are stored in one contiguous block,
-///   eliminating the need for separate heap allocations for the array.
-/// - No extra dereferencing: Elements are accessed directly via pointer
-///   arithmetic (beginPtr() + offset) rather than through an additional
-///   pointer indirection.
-/// - Cache locality: All elements are guaranteed to be adjacent in memory,
-///   improving cache performance during iteration.
-/// - Memory efficiency: No overhead from separate pointer storage or
-///   fragmented allocations.
-///
-/// The 'Raws' member serves as both the first element storage and the base
+/// The 'Rows' member serves as both the first element storage and the base
 /// address for pointer arithmetic to access subsequent elements.
 class ClusteredRows {
 public:
@@ -891,7 +877,7 @@ class ClusteredRows {
 
 private:
   uint64_t Size;
-  DebugLineTableRowRef Raws;
+  DebugLineTableRowRef Rows;
 
   ClusteredRows(uint64_t Size) : Size(Size) {}
   static uint64_t getTotalSize(uint64_t Size) {
@@ -899,17 +885,17 @@ class ClusteredRows {
     return sizeof(ClusteredRows) + (Size - 1) * sizeof(DebugLineTableRowRef);
   }
   const DebugLineTableRowRef *beginPtrConst() const {
-    return reinterpret_cast<const DebugLineTableRowRef *>(&Raws);
+    return reinterpret_cast<const DebugLineTableRowRef *>(&Rows);
   }
   DebugLineTableRowRef *beginPtr() {
-    return reinterpret_cast<DebugLineTableRowRef *>(&Raws);
+    return reinterpret_cast<DebugLineTableRowRef *>(&Rows);
   }
 
-  friend class ClasteredRowsContainer;
+  friend class ClusteredRowsContainer;
 };
 
-/// ClasteredRowsContainer manages the lifecycle of ClusteredRows objects.
-class ClasteredRowsContainer {
+/// ClusteredRowsContainer manages the lifecycle of ClusteredRows objects.
+class ClusteredRowsContainer {
 public:
   ClusteredRows *createClusteredRows(uint64_t Size) {
     auto *CR = new (std::malloc(ClusteredRows::getTotalSize(Size)))
@@ -917,7 +903,7 @@ class ClasteredRowsContainer {
     Clusters.push_back(CR);
     return CR;
   }
-  ~ClasteredRowsContainer() {
+  ~ClusteredRowsContainer() {
     for (auto *CR : Clusters)
       std::free(CR);
   }
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index a3a6b31451441..8635cba006991 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1502,7 +1502,7 @@ Error BinaryFunction::disassemble() {
       }
       if (!Rows.empty()) {
         ClusteredRows *Cluster =
-            BC.ClasteredRows.createClusteredRows(Rows.size());
+            BC.ClusteredRows.createClusteredRows(Rows.size());
         Cluster->populate(Rows);
         Instruction.setLoc(Cluster->toSMLoc());
       }
diff --git a/bolt/unittests/Core/ClusteredRows.cpp b/bolt/unittests/Core/ClusteredRows.cpp
index a75209a75dfad..4665022c91fdd 100644
--- a/bolt/unittests/Core/ClusteredRows.cpp
+++ b/bolt/unittests/Core/ClusteredRows.cpp
@@ -19,10 +19,10 @@ namespace {
 class ClusteredRowsTest : public ::testing::Test {
 protected:
   void SetUp() override {
-    Container = std::make_unique<ClasteredRowsContainer>();
+    Container = std::make_unique<ClusteredRowsContainer>();
   }
 
-  std::unique_ptr<ClasteredRowsContainer> Container;
+  std::unique_ptr<ClusteredRowsContainer> Container;
 };
 
 TEST_F(ClusteredRowsTest, CreateSingleElement) {

>From 04663a0cf062005b1f6abae2d1e0078396e36863 Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <gpastukhov at meta.com>
Date: Thu, 31 Jul 2025 10:45:31 -0700
Subject: [PATCH 7/7] Made the test architecture independent

---
 bolt/test/lit.cfg.py                               |  4 ++--
 bolt/test/{X86 => }/multi-cu-debug-line.test       | 14 +++++++-------
 .../process-debug-line.sh => process-debug-line}   |  0
 3 files changed, 9 insertions(+), 9 deletions(-)
 rename bolt/test/{X86 => }/multi-cu-debug-line.test (93%)
 rename bolt/test/{Inputs/process-debug-line.sh => process-debug-line} (100%)

diff --git a/bolt/test/lit.cfg.py b/bolt/test/lit.cfg.py
index 0d05229be2bf3..508db8b890190 100644
--- a/bolt/test/lit.cfg.py
+++ b/bolt/test/lit.cfg.py
@@ -10,8 +10,7 @@
 import lit.util
 
 from lit.llvm import llvm_config
-from lit.llvm.subst import ToolSubst
-from lit.llvm.subst import FindTool
+from lit.llvm.subst import FindTool, ToolSubst
 
 # Configuration file for the 'lit' test runner.
 
@@ -127,6 +126,7 @@
         unresolved="fatal",
         extra_args=[link_fdata_cmd],
     ),
+    ToolSubst("process-debug-line", unresolved="fatal"),
     ToolSubst("merge-fdata", unresolved="fatal"),
     ToolSubst("llvm-readobj", unresolved="fatal"),
     ToolSubst("llvm-dwp", unresolved="fatal"),
diff --git a/bolt/test/X86/multi-cu-debug-line.test b/bolt/test/multi-cu-debug-line.test
similarity index 93%
rename from bolt/test/X86/multi-cu-debug-line.test
rename to bolt/test/multi-cu-debug-line.test
index 430b281445db5..a94c901bbcc5a 100644
--- a/bolt/test/X86/multi-cu-debug-line.test
+++ b/bolt/test/multi-cu-debug-line.test
@@ -1,16 +1,16 @@
 ## Test that BOLT correctly handles debug line information for functions
 ## that belong to multiple compilation units (e.g., inline functions in
 ## common header files). The test covers two scenarios:
-## 1. Normal processing: .debug_line section shows lines for the function 
+## 1. Normal processing: .debug_line section shows lines for the function
 ##    in all CUs where it was compiled, with no duplicate rows within CUs
-## 2. Functions not processed: When BOLT doesn't process functions (using 
+## 2. Functions not processed: When BOLT doesn't process functions (using
 ##    --funcs with nonexistent function), original debug info is preserved
 
 # REQUIRES: system-linux
 
 ## Compile test files with debug info
-# RUN: %clang %cflags -O0 -g %S/../Inputs/multi-cu-file1.c %S/../Inputs/multi-cu-file2.c \
-# RUN:   -I%S/../Inputs -o %t.exe -Wl,-q
+# RUN: %clang %cflags -O0 -g %S/Inputs/multi-cu-file1.c %S/Inputs/multi-cu-file2.c \
+# RUN:   -I%S/Inputs -o %t.exe -Wl,-q
 
 ## Test 1: Normal BOLT processing (functions are processed/optimized)
 # RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections
@@ -26,7 +26,7 @@
 # BASIC: name: "{{.*}}multi-cu-file2.c"
 
 ## Use our helper script to create a normalized table without addresses
-# RUN: %S/../Inputs/process-debug-line.sh %t.debug-line.txt > %t.normalized-debug-line.txt
+# RUN: process-debug-line %t.debug-line.txt > %t.normalized-debug-line.txt
 # RUN: FileCheck %s --check-prefix=NORMALIZED --input-file %t.normalized-debug-line.txt
 
 ## Check that we have line entries for the inline function (lines 5, 6, 7) from multi-cu-common.h
@@ -72,7 +72,7 @@
 # PRESERVED-BASIC: name: "{{.*}}multi-cu-file2.c"
 
 ## Create normalized output for the not-emitted case
-# RUN: %S/../Inputs/process-debug-line.sh %t.not-emitted.debug-line.txt > %t.not-emitted.normalized.txt
+# RUN: process-debug-line %t.not-emitted.debug-line.txt > %t.not-emitted.normalized.txt
 # RUN: FileCheck %s --check-prefix=PRESERVED-NORMALIZED --input-file %t.not-emitted.normalized.txt
 
 ## Check that we have line entries for the inline function (lines 5, 6, 7) from multi-cu-common.h
@@ -105,4 +105,4 @@
 # PRESERVED-FILE2: multi-cu-file2.c {{[0-9]+}} {{[0-9]+}} multi-cu-file2.c
 
 ## Note: We do not check for duplicates in Test 2 since we are preserving original debug info as-is
-## and the original may contain patterns that would be flagged as duplicates by our normalization
\ No newline at end of file
+## and the original may contain patterns that would be flagged as duplicates by our normalization
diff --git a/bolt/test/Inputs/process-debug-line.sh b/bolt/test/process-debug-line
similarity index 100%
rename from bolt/test/Inputs/process-debug-line.sh
rename to bolt/test/process-debug-line