[llvm] [MC][DWARF] Consider non-native path separators when building DWARF directory-tables (PR #115888)

Jeremy Morse via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 12 08:02:13 PST 2024


https://github.com/jmorse created https://github.com/llvm/llvm-project/pull/115888

This is a patch that allows a triple (or flag) to override the native path separators used when constructing the DWARF linetable directory-entries. This means that if you feed LLVM-IR with Windows path endings in the debug-info metadata into llc on Linux, you'll get an identical output. Otherwise, the difference in path separators means a different set of directories is produced (see test).

The overall rational for this patch is identical-binary-checking of objects produced on Linux versus Windows. The PlayStation build environment is fundamentally a Windows environment, but we often do testing under a Linux environment because it's easier. We feel a lot more confident about this when we produce identical binaries whether building on Windows or Linux, and are able to test linux-built binaries in Windows environments with as few differences as possible. Hence this patch!

There's precedent in the clang frontend with the -ffile-reproducible flag and a few other switches, which I believe helps Chromium generate identical binaries between Linux/Windows, although not for debug-info sections.

>From 0609233ca33ccb84cd3722ef4b68699c3ea9cb9a Mon Sep 17 00:00:00 2001
From: Jeremy Morse <jeremy.morse at sony.com>
Date: Tue, 12 Nov 2024 14:58:40 +0000
Subject: [PATCH 1/2] [MC][DebugInfo] Consider the target when deciding what's
 a path separator

This patch teaches MCContext and DWARF linetable headers about path
separators, or at least lets them store them, so that we can switch out
the path separator at runtime. The use-case for this is targets where the
development environment isn't the same as the compilers native environment,
which can lead to differences in the computed DWARF directory tables in
.debug_line.
---
 llvm/include/llvm/MC/MCContext.h              |  6 +-
 llvm/include/llvm/MC/MCDwarf.h                | 15 +++-
 llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp    |  3 +-
 llvm/lib/MC/MCContext.cpp                     |  9 ++-
 llvm/lib/MC/MCDwarf.cpp                       | 12 +--
 .../DebugInfo/dir-table-path-separators.ll    | 73 +++++++++++++++++++
 6 files changed, 104 insertions(+), 14 deletions(-)
 create mode 100644 llvm/test/DebugInfo/dir-table-path-separators.ll

diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h
index 57ba40f7ac26fc..3fc241b066b350 100644
--- a/llvm/include/llvm/MC/MCContext.h
+++ b/llvm/include/llvm/MC/MCContext.h
@@ -335,6 +335,8 @@ class MCContext {
 
   MCTargetOptions const *TargetOptions;
 
+  llvm::sys::path::Style PathStyle = llvm::sys::path::Style::native;
+
   bool HadError = false;
 
   void reportCommon(SMLoc Loc,
@@ -721,6 +723,8 @@ class MCContext {
 
   void setDwarfCompileUnitID(unsigned CUIndex) { DwarfCompileUnitID = CUIndex; }
 
+  llvm::sys::path::Style getPathStyle() const { return PathStyle; }
+
   /// Specifies the "root" file and directory of the compilation unit.
   /// These are "file 0" and "directory 0" in DWARF v5.
   void setMCLineTableRootFile(unsigned CUID, StringRef CompilationDir,
@@ -728,7 +732,7 @@ class MCContext {
                               std::optional<MD5::MD5Result> Checksum,
                               std::optional<StringRef> Source) {
     getMCDwarfLineTable(CUID).setRootFile(CompilationDir, Filename, Checksum,
-                                          Source);
+                                          Source, PathStyle);
   }
 
   /// Reports whether MD5 checksum usage is consistent (all-or-none).
diff --git a/llvm/include/llvm/MC/MCDwarf.h b/llvm/include/llvm/MC/MCDwarf.h
index 1392336968e74a..58e93d790305cb 100644
--- a/llvm/include/llvm/MC/MCDwarf.h
+++ b/llvm/include/llvm/MC/MCDwarf.h
@@ -21,6 +21,7 @@
 #include "llvm/MC/StringTableBuilder.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/MD5.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/SMLoc.h"
 #include "llvm/Support/StringSaver.h"
 #include <cassert>
@@ -277,6 +278,7 @@ struct MCDwarfLineTableHeader {
   StringMap<unsigned> SourceIdMap;
   std::string CompilationDir;
   MCDwarfFile RootFile;
+  llvm::sys::path::Style PathStyle = llvm::sys::path::Style::native;
   bool HasAnySource = false;
 
 private:
@@ -311,7 +313,9 @@ struct MCDwarfLineTableHeader {
 
   void setRootFile(StringRef Directory, StringRef FileName,
                    std::optional<MD5::MD5Result> Checksum,
-                   std::optional<StringRef> Source) {
+                   std::optional<StringRef> Source,
+                   llvm::sys::path::Style S = llvm::sys::path::Style::native) {
+    PathStyle = S;
     CompilationDir = std::string(Directory);
     RootFile.Name = std::string(FileName);
     RootFile.DirIndex = 0;
@@ -342,10 +346,11 @@ class MCDwarfDwoLineTable {
 public:
   void maybeSetRootFile(StringRef Directory, StringRef FileName,
                         std::optional<MD5::MD5Result> Checksum,
-                        std::optional<StringRef> Source) {
+                        std::optional<StringRef> Source,
+                        llvm::sys::path::Style S) {
     if (!Header.RootFile.Name.empty())
       return;
-    Header.setRootFile(Directory, FileName, Checksum, Source);
+    Header.setRootFile(Directory, FileName, Checksum, Source, S);
   }
 
   unsigned getFile(StringRef Directory, StringRef FileName,
@@ -394,7 +399,9 @@ class MCDwarfLineTable {
 
   void setRootFile(StringRef Directory, StringRef FileName,
                    std::optional<MD5::MD5Result> Checksum,
-                   std::optional<StringRef> Source) {
+                   std::optional<StringRef> Source,
+                   llvm::sys::path::Style S) {
+    Header.PathStyle = S;
     Header.CompilationDir = std::string(Directory);
     Header.RootFile.Name = std::string(FileName);
     Header.RootFile.DirIndex = 0;
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 88ed3f5dc7b4b4..3c3aaffeaba3dc 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -3429,7 +3429,8 @@ MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) {
   const DICompileUnit *DIUnit = CU.getCUNode();
   SplitTypeUnitFileTable.maybeSetRootFile(
       DIUnit->getDirectory(), DIUnit->getFilename(),
-      getMD5AsBytes(DIUnit->getFile()), DIUnit->getSource());
+      getMD5AsBytes(DIUnit->getFile()), DIUnit->getSource(),
+      Asm->OutStreamer->getContext().getPathStyle());
   return &SplitTypeUnitFileTable;
 }
 
diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp
index b97f9d9f5fed0f..a859b0d3b22317 100644
--- a/llvm/lib/MC/MCContext.cpp
+++ b/llvm/lib/MC/MCContext.cpp
@@ -77,6 +77,9 @@ MCContext::MCContext(const Triple &TheTriple, const MCAsmInfo *mai,
   SaveTempLabels = TargetOptions && TargetOptions->MCSaveTempLabels;
   SecureLogFile = TargetOptions ? TargetOptions->AsSecureLogFile : "";
 
+  if (TheTriple.isPS())
+    PathStyle = llvm::sys::path::Style::windows;
+
   if (SrcMgr && SrcMgr->getNumBuffers())
     MainFileName = std::string(SrcMgr->getMemoryBuffer(SrcMgr->getMainFileID())
                                    ->getBufferIdentifier());
@@ -970,12 +973,12 @@ void MCContext::setGenDwarfRootFile(StringRef InputFileName, StringRef Buffer) {
   if (FileNameBuf.empty() || FileNameBuf == "-")
     FileNameBuf = "<stdin>";
   if (!getMainFileName().empty() && FileNameBuf != getMainFileName()) {
-    llvm::sys::path::remove_filename(FileNameBuf);
-    llvm::sys::path::append(FileNameBuf, getMainFileName());
+    llvm::sys::path::remove_filename(FileNameBuf, PathStyle);
+    llvm::sys::path::append(FileNameBuf, PathStyle, getMainFileName());
   }
   StringRef FileName = FileNameBuf;
   if (FileName.consume_front(getCompilationDir()))
-    if (llvm::sys::path::is_separator(FileName.front()))
+    if (llvm::sys::path::is_separator(FileName.front(), PathStyle))
       FileName = FileName.drop_front();
   assert(!FileName.empty());
   setMCLineTableRootFile(
diff --git a/llvm/lib/MC/MCDwarf.cpp b/llvm/lib/MC/MCDwarf.cpp
index e058358fb8ad4b..3df95d8a0f196c 100644
--- a/llvm/lib/MC/MCDwarf.cpp
+++ b/llvm/lib/MC/MCDwarf.cpp
@@ -664,9 +664,9 @@ MCDwarfLineTableHeader::tryGetFile(StringRef &Directory, StringRef &FileName,
 
   if (Directory.empty()) {
     // Separate the directory part from the basename of the FileName.
-    StringRef tFileName = sys::path::filename(FileName);
+    StringRef tFileName = sys::path::filename(FileName, PathStyle);
     if (!tFileName.empty()) {
-      Directory = sys::path::parent_path(FileName);
+      Directory = sys::path::parent_path(FileName, PathStyle);
       if (!Directory.empty())
         FileName = tFileName;
     }
@@ -939,7 +939,8 @@ static void EmitGenDwarfAranges(MCStreamer *MCOS,
 static void EmitGenDwarfInfo(MCStreamer *MCOS,
                              const MCSymbol *AbbrevSectionSymbol,
                              const MCSymbol *LineSectionSymbol,
-                             const MCSymbol *RangesSymbol) {
+                             const MCSymbol *RangesSymbol,
+                             llvm::sys::path::Style Style) {
   MCContext &context = MCOS->getContext();
 
   MCOS->switchSection(context.getObjectFileInfo()->getDwarfInfoSection());
@@ -1037,7 +1038,7 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS,
   const SmallVectorImpl<std::string> &MCDwarfDirs = context.getMCDwarfDirs();
   if (MCDwarfDirs.size() > 0) {
     MCOS->emitBytes(MCDwarfDirs[0]);
-    MCOS->emitBytes(sys::path::get_separator());
+    MCOS->emitBytes(sys::path::get_separator(Style));
   }
   const SmallVectorImpl<MCDwarfFile> &MCDwarfFiles = context.getMCDwarfFiles();
   // MCDwarfFiles might be empty if we have an empty source file.
@@ -1225,7 +1226,8 @@ void MCGenDwarfInfo::Emit(MCStreamer *MCOS) {
   EmitGenDwarfAbbrev(MCOS);
 
   // Output the data for .debug_info section.
-  EmitGenDwarfInfo(MCOS, AbbrevSectionSymbol, LineSectionSymbol, RangesSymbol);
+  EmitGenDwarfInfo(MCOS, AbbrevSectionSymbol, LineSectionSymbol, RangesSymbol,
+                   context.getPathStyle());
 }
 
 //
diff --git a/llvm/test/DebugInfo/dir-table-path-separators.ll b/llvm/test/DebugInfo/dir-table-path-separators.ll
new file mode 100644
index 00000000000000..90240d101c0f80
--- /dev/null
+++ b/llvm/test/DebugInfo/dir-table-path-separators.ll
@@ -0,0 +1,73 @@
+; RUN: llc %s -o - -filetype=obj -mtriple x86_64-pc-linux-gnu | llvm-dwarfdump - --debug-line | FileCheck %s --check-prefix=LINUX
+; RUN: llc %s -o - -filetype=obj -mtriple x86_64-sie-ps5 | llvm-dwarfdump - --debug-line | FileCheck %s --check-prefix=PS5
+;
+; UNSUPPORTED: system-windows
+;
+; Check that the DWARF-printing MC backend is willing to consider Windows '\'
+; characters as path separators so that it can build the directory index table.
+; On Linux, the Windows path separators below would been seen as part of the
+; filename, and so wouldn't be combined into a directory entry. Wheras on
+; Windows (or a target masquerading as Windows) they should be combined into a
+; "foo\bar" directory.
+;
+; LINUX: include_directories[  0] = "C:\\foobar"
+; LINUX: file_names[  0]:
+; LINUX:           name: "foo\\bar\\test.cpp"
+; LINUX:      dir_index: 0
+; LINUX: file_names[  1]:
+; LINUX:           name: "foo\\bar\\bar.cpp"
+; LINUX:      dir_index: 0
+; LINUX: file_names[  2]:
+; LINUX:           name: "foo\\bar\\baz.cpp"
+; LINUX:      dir_index: 0
+;
+; PS5:      include_directories[  0] = "C:\\foobar"
+; PS5-NEXT: include_directories[  1] = "foo\\bar"
+; PS5-NEXT: file_names[  0]:
+; PS5-NEXT:           name: "foo\\bar\\test.cpp"
+; PS5-NEXT:      dir_index: 0
+; PS5-NEXT: file_names[  1]:
+; PS5-NEXT:           name: "bar.cpp"
+; PS5-NEXT:      dir_index: 1
+; PS5-NEXT: file_names[  2]:
+; PS5-NEXT:           name: "baz.cpp"
+; PS5-NEXT:      dir_index: 1
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+
+define dso_local noundef i32 @_Z3foov() local_unnamed_addr !dbg !9 {
+  ret i32 0, !dbg !14
+}
+
+define dso_local noundef i32 @_Z3barv() local_unnamed_addr !dbg !15 {
+  ret i32 0, !dbg !17
+}
+
+define dso_local noundef i32 @main() local_unnamed_addr !dbg !18 {
+  ret i32 0, !dbg !19
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3, !4, !5, !6, !7}
+!llvm.ident = !{!8}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "foo\\bar\\test.cpp", directory: "C:\\foobar")
+!2 = !{i32 7, !"Dwarf Version", i32 5}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"wchar_size", i32 4}
+!5 = !{i32 8, !"PIC Level", i32 2}
+!6 = !{i32 7, !"PIE Level", i32 2}
+!7 = !{i32 7, !"uwtable", i32 2}
+!8 = !{!"clang"}
+!9 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !10, file: !10, line: 1, type: !11, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!10 = !DIFile(filename: "foo\\bar\\bar.cpp", directory: "C:\\foobar")
+!11 = !DISubroutineType(types: !12)
+!12 = !{!13}
+!13 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!14 = !DILocation(line: 2, column: 3, scope: !9)
+!15 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", scope: !16, file: !16, line: 1, type: !11, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!16 = !DIFile(filename: "foo\\bar\\baz.cpp", directory: "C:\\foobar")
+!17 = !DILocation(line: 2, column: 3, scope: !15)
+!18 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 3, type: !11, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!19 = !DILocation(line: 4, column: 3, scope: !18)

>From bfdc8d60e2ce18b8d254556c1e68c6444efb0196 Mon Sep 17 00:00:00 2001
From: Jeremy Morse <jeremy.morse at sony.com>
Date: Tue, 12 Nov 2024 15:44:37 +0000
Subject: [PATCH 2/2] Add hidden switch for testing purposes

---
 llvm/lib/MC/MCContext.cpp                        | 6 +++++-
 llvm/test/DebugInfo/dir-table-path-separators.ll | 1 +
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp
index a859b0d3b22317..e1e3f9ddfeff7b 100644
--- a/llvm/lib/MC/MCContext.cpp
+++ b/llvm/lib/MC/MCContext.cpp
@@ -59,6 +59,10 @@
 
 using namespace llvm;
 
+cl::opt<bool> ForceDWARFWindowsPathSeps ("force-dwarf-windows-path-seps",
+   cl::desc("Use Windows path separators when building DWARF linetables"),
+   cl::Hidden);
+
 static void defaultDiagHandler(const SMDiagnostic &SMD, bool, const SourceMgr &,
                                std::vector<const MDNode *> &) {
   SMD.print(nullptr, errs());
@@ -77,7 +81,7 @@ MCContext::MCContext(const Triple &TheTriple, const MCAsmInfo *mai,
   SaveTempLabels = TargetOptions && TargetOptions->MCSaveTempLabels;
   SecureLogFile = TargetOptions ? TargetOptions->AsSecureLogFile : "";
 
-  if (TheTriple.isPS())
+  if (ForceDWARFWindowsPathSeps || TheTriple.isPS())
     PathStyle = llvm::sys::path::Style::windows;
 
   if (SrcMgr && SrcMgr->getNumBuffers())
diff --git a/llvm/test/DebugInfo/dir-table-path-separators.ll b/llvm/test/DebugInfo/dir-table-path-separators.ll
index 90240d101c0f80..a9c8e039af55f0 100644
--- a/llvm/test/DebugInfo/dir-table-path-separators.ll
+++ b/llvm/test/DebugInfo/dir-table-path-separators.ll
@@ -1,4 +1,5 @@
 ; RUN: llc %s -o - -filetype=obj -mtriple x86_64-pc-linux-gnu | llvm-dwarfdump - --debug-line | FileCheck %s --check-prefix=LINUX
+; RUN: llc %s -o - -filetype=obj -mtriple x86_64-pc-linux-gnu -force-dwarf-windows-path-seps=true | llvm-dwarfdump - --debug-line | FileCheck %s --check-prefix=PS5
 ; RUN: llc %s -o - -filetype=obj -mtriple x86_64-sie-ps5 | llvm-dwarfdump - --debug-line | FileCheck %s --check-prefix=PS5
 ;
 ; UNSUPPORTED: system-windows



More information about the llvm-commits mailing list