[llvm] 65f6373 - [dsymutil] Skip duplicates files with identical time stamps in the debug map

Jonas Devlieghere via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 12 10:01:18 PDT 2023


Author: Jonas Devlieghere
Date: 2023-06-12T10:01:12-07:00
New Revision: 65f63739805e2c884364cb5b4b77430f96ecee7c

URL: https://github.com/llvm/llvm-project/commit/65f63739805e2c884364cb5b4b77430f96ecee7c
DIFF: https://github.com/llvm/llvm-project/commit/65f63739805e2c884364cb5b4b77430f96ecee7c.diff

LOG: [dsymutil] Skip duplicates files with identical time stamps in the debug map

Static archives can contain multiple files with the same file name, in
which case the timestamp is used to disambiguate. Because timestamps are
expressed in seconds since epoch timestamp collisions are far from
impossible. Furthermore, to facilitate reproducible builds, the static
linker can be told to emit no timestamps at all.

dsymutil already detects timestamp mismatches between the debug map and
the object files. However, it does not handle timestamp collisions
within the debug maps (STABS). Currently, we arbitrarily pick the first
debug map entry and ignore the rest. This is incorrect: if a symbol
exists in multiple object files, the linker might not have picked the
one from the first object file. This also results in missing symbol
warnings for all the symbols not defined in the first object file.

Given that in this scenario, dsymutil does not have enough information
to disambiguate, it should print a single informative warning and skip
the ambiguous debug map objects.

rdar://110374836

Differential revision: https://reviews.llvm.org/D152585

Added: 
    llvm/test/tools/dsymutil/ARM/static-archive-collision.test
    llvm/test/tools/dsymutil/Inputs/private/tmp/collision/foo.a
    llvm/test/tools/dsymutil/Inputs/private/tmp/collision/main.o
    llvm/test/tools/dsymutil/Inputs/private/tmp/collision/main.out

Modified: 
    llvm/tools/dsymutil/MachODebugMapParser.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/test/tools/dsymutil/ARM/static-archive-collision.test b/llvm/test/tools/dsymutil/ARM/static-archive-collision.test
new file mode 100644
index 0000000000000..fb87ce827e1bd
--- /dev/null
+++ b/llvm/test/tools/dsymutil/ARM/static-archive-collision.test
@@ -0,0 +1,26 @@
+$ cat f.c
+int f() {
+  volatile int i;
+  return i;
+}
+$ cat g.c
+int g() {
+  volatile int i;
+  return i;
+}
+$ cat main.c
+int f();
+int g();
+
+int main() {
+  return f() + g();
+}
+$ clang -g f.c -c -o f/foo.o
+$ clang -g g.c -c -o g/foo.o
+$ libtool -static f/foo.o g/foo.o -o foo.a
+$ clang main.o foo.a -o main.out
+
+RUN: dsymutil -oso-prepend-path %p/../Inputs %p/../Inputs/private/tmp/collision/main.out --dump-debug-map 2>&1 | FileCheck %s
+CHECK: skipping debug map object with duplicate name and timestamp: 1969-12-31 16:00:00.000000000 /private/tmp/collision/foo.a(foo.o)
+CHECK-NOT: could not find object file symbol for symbol _g
+CHECK-NOT: could not find object file symbol for symbol _f

diff  --git a/llvm/test/tools/dsymutil/Inputs/private/tmp/collision/foo.a b/llvm/test/tools/dsymutil/Inputs/private/tmp/collision/foo.a
new file mode 100644
index 0000000000000..fc93fb9da390a
Binary files /dev/null and b/llvm/test/tools/dsymutil/Inputs/private/tmp/collision/foo.a 
diff er

diff  --git a/llvm/test/tools/dsymutil/Inputs/private/tmp/collision/main.o b/llvm/test/tools/dsymutil/Inputs/private/tmp/collision/main.o
new file mode 100644
index 0000000000000..da7ebda21a149
Binary files /dev/null and b/llvm/test/tools/dsymutil/Inputs/private/tmp/collision/main.o 
diff er

diff  --git a/llvm/test/tools/dsymutil/Inputs/private/tmp/collision/main.out b/llvm/test/tools/dsymutil/Inputs/private/tmp/collision/main.out
new file mode 100755
index 0000000000000..18e1fba4fe296
Binary files /dev/null and b/llvm/test/tools/dsymutil/Inputs/private/tmp/collision/main.out 
diff er

diff  --git a/llvm/tools/dsymutil/MachODebugMapParser.cpp b/llvm/tools/dsymutil/MachODebugMapParser.cpp
index 305a0556d64cf..d3bda338a9b32 100644
--- a/llvm/tools/dsymutil/MachODebugMapParser.cpp
+++ b/llvm/tools/dsymutil/MachODebugMapParser.cpp
@@ -9,8 +9,10 @@
 #include "BinaryHolder.h"
 #include "DebugMap.h"
 #include "MachOUtils.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/Object/MachO.h"
+#include "llvm/Support/Chrono.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/WithColor.h"
 #include "llvm/Support/raw_ostream.h"
@@ -31,7 +33,7 @@ class MachODebugMapParser {
       : BinaryPath(std::string(BinaryPath)), Archs(Archs.begin(), Archs.end()),
         PathPrefix(std::string(PathPrefix)),
         PaperTrailWarnings(PaperTrailWarnings), BinHolder(VFS, Verbose),
-        CurrentDebugMapObject(nullptr) {}
+        CurrentDebugMapObject(nullptr), SkipDebugMapObject(false) {}
 
   /// Parses and returns the DebugMaps of the input binary. The binary contains
   /// multiple maps in case it is a universal binary.
@@ -42,6 +44,8 @@ class MachODebugMapParser {
   /// Walk the symbol table and dump it.
   bool dumpStab();
 
+  using OSO = std::pair<llvm::StringRef, uint64_t>;
+
 private:
   std::string BinaryPath;
   SmallVector<StringRef, 1> Archs;
@@ -70,12 +74,18 @@ class MachODebugMapParser {
   /// Element of the debug map corresponding to the current object file.
   DebugMapObject *CurrentDebugMapObject;
 
+  /// Whether we need to skip the current debug map object.
+  bool SkipDebugMapObject;
+
   /// Holds function info while function scope processing.
   const char *CurrentFunctionName;
   uint64_t CurrentFunctionAddress;
 
   std::unique_ptr<DebugMap> parseOneBinary(const MachOObjectFile &MainBinary,
                                            StringRef BinaryPath);
+  void handleStabDebugMap(
+      const MachOObjectFile &MainBinary,
+      std::function<void(uint32_t, uint8_t, uint8_t, uint16_t, uint64_t)> F);
 
   void
   switchToNewDebugMapObject(StringRef Filename,
@@ -85,13 +95,21 @@ class MachODebugMapParser {
   std::vector<StringRef> getMainBinarySymbolNames(uint64_t Value);
   void loadMainBinarySymbols(const MachOObjectFile &MainBinary);
   void loadCurrentObjectFileSymbols(const object::MachOObjectFile &Obj);
+
+  void handleStabOSOEntry(uint32_t StringIndex, uint8_t Type,
+                          uint8_t SectionIndex, uint16_t Flags, uint64_t Value,
+                          llvm::DenseSet<OSO> &OSOs,
+                          llvm::SmallSet<OSO, 4> &Duplicates);
   void handleStabSymbolTableEntry(uint32_t StringIndex, uint8_t Type,
                                   uint8_t SectionIndex, uint16_t Flags,
-                                  uint64_t Value);
+                                  uint64_t Value,
+                                  const llvm::SmallSet<OSO, 4> &Duplicates);
 
-  template <typename STEType> void handleStabDebugMapEntry(const STEType &STE) {
-    handleStabSymbolTableEntry(STE.n_strx, STE.n_type, STE.n_sect, STE.n_desc,
-                               STE.n_value);
+  template <typename STEType>
+  void handleStabDebugMapEntry(
+      const STEType &STE,
+      std::function<void(uint32_t, uint8_t, uint8_t, uint16_t, uint64_t)> F) {
+    F(STE.n_strx, STE.n_type, STE.n_sect, STE.n_desc, STE.n_value);
   }
 
   void addCommonSymbols();
@@ -140,6 +158,7 @@ void MachODebugMapParser::resetParserState() {
   CurrentObjectAliasMap.clear();
   SeenAliasValues.clear();
   CurrentDebugMapObject = nullptr;
+  SkipDebugMapObject = false;
 }
 
 /// Commons symbols won't show up in the symbol map but might need to be
@@ -199,6 +218,18 @@ static std::string getArchName(const object::MachOObjectFile &Obj) {
   return std::string(T.getArchName());
 }
 
+void MachODebugMapParser::handleStabDebugMap(
+    const MachOObjectFile &MainBinary,
+    std::function<void(uint32_t, uint8_t, uint8_t, uint16_t, uint64_t)> F) {
+  for (const SymbolRef &Symbol : MainBinary.symbols()) {
+    const DataRefImpl &DRI = Symbol.getRawDataRefImpl();
+    if (MainBinary.is64Bit())
+      handleStabDebugMapEntry(MainBinary.getSymbol64TableEntry(DRI), F);
+    else
+      handleStabDebugMapEntry(MainBinary.getSymbolTableEntry(DRI), F);
+  }
+}
+
 std::unique_ptr<DebugMap>
 MachODebugMapParser::parseOneBinary(const MachOObjectFile &MainBinary,
                                     StringRef BinaryPath) {
@@ -206,14 +237,41 @@ MachODebugMapParser::parseOneBinary(const MachOObjectFile &MainBinary,
                                       MainBinary.getUuid());
   loadMainBinarySymbols(MainBinary);
   MainBinaryStrings = MainBinary.getStringTableData();
-  for (const SymbolRef &Symbol : MainBinary.symbols()) {
-    const DataRefImpl &DRI = Symbol.getRawDataRefImpl();
-    if (MainBinary.is64Bit())
-      handleStabDebugMapEntry(MainBinary.getSymbol64TableEntry(DRI));
-    else
-      handleStabDebugMapEntry(MainBinary.getSymbolTableEntry(DRI));
+
+  // Static archives can contain multiple object files with identical names, in
+  // which case the timestamp is used to disambiguate. However, if both are
+  // identical, there's no way to tell them apart. Detect this and skip
+  // duplicate debug map objects.
+  llvm::DenseSet<OSO> OSOs;
+  llvm::SmallSet<OSO, 4> Duplicates;
+
+  // Iterate over all the STABS to find duplicate OSO entries.
+  handleStabDebugMap(MainBinary,
+                     [&](uint32_t StringIndex, uint8_t Type,
+                         uint8_t SectionIndex, uint16_t Flags, uint64_t Value) {
+                       handleStabOSOEntry(StringIndex, Type, SectionIndex,
+                                          Flags, Value, OSOs, Duplicates);
+                     });
+
+  // Print an informative warning with the duplicate object file name and time
+  // stamp.
+  for (const auto &OSO : Duplicates) {
+    std::string Buffer;
+    llvm::raw_string_ostream OS(Buffer);
+    OS << sys::TimePoint<std::chrono::seconds>(sys::toTimePoint(OSO.second));
+    Warning("skipping debug map object with duplicate name and timestamp: " +
+            OS.str() + Twine(" ") + Twine(OSO.first));
   }
 
+  // Build the debug map by iterating over the STABS again but ignore the
+  // duplicate debug objects.
+  handleStabDebugMap(MainBinary, [&](uint32_t StringIndex, uint8_t Type,
+                                     uint8_t SectionIndex, uint16_t Flags,
+                                     uint64_t Value) {
+    handleStabSymbolTableEntry(StringIndex, Type, SectionIndex, Flags, Value,
+                               Duplicates);
+  });
+
   resetParserState();
   return std::move(Result);
 }
@@ -408,20 +466,38 @@ ErrorOr<std::vector<std::unique_ptr<DebugMap>>> MachODebugMapParser::parse() {
   return std::move(Results);
 }
 
+void MachODebugMapParser::handleStabOSOEntry(
+    uint32_t StringIndex, uint8_t Type, uint8_t SectionIndex, uint16_t Flags,
+    uint64_t Value, llvm::DenseSet<OSO> &OSOs,
+    llvm::SmallSet<OSO, 4> &Duplicates) {
+  if (Type != MachO::N_OSO)
+    return;
+
+  OSO O(&MainBinaryStrings.data()[StringIndex], Value);
+  if (!OSOs.insert(O).second)
+    Duplicates.insert(O);
+}
+
 /// Interpret the STAB entries to fill the DebugMap.
-void MachODebugMapParser::handleStabSymbolTableEntry(uint32_t StringIndex,
-                                                     uint8_t Type,
-                                                     uint8_t SectionIndex,
-                                                     uint16_t Flags,
-                                                     uint64_t Value) {
+void MachODebugMapParser::handleStabSymbolTableEntry(
+    uint32_t StringIndex, uint8_t Type, uint8_t SectionIndex, uint16_t Flags,
+    uint64_t Value, const llvm::SmallSet<OSO, 4> &Duplicates) {
   if (!(Type & MachO::N_STAB))
     return;
 
   const char *Name = &MainBinaryStrings.data()[StringIndex];
 
   // An N_OSO entry represents the start of a new object file description.
-  if (Type == MachO::N_OSO)
+  if (Type == MachO::N_OSO) {
+    if (Duplicates.count(OSO(Name, Value))) {
+      SkipDebugMapObject = true;
+      return;
+    }
     return switchToNewDebugMapObject(Name, sys::toTimePoint(Value));
+  }
+
+  if (SkipDebugMapObject)
+    return;
 
   if (Type == MachO::N_AST) {
     SmallString<80> Path(PathPrefix);


        


More information about the llvm-commits mailing list