[llvm] [MC][DWARF] Consider non-native path separators when building DWARF directory-tables (PR #115888)

via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 12 08:02:51 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-debuginfo

Author: Jeremy Morse (jmorse)

<details>
<summary>Changes</summary>

This is a patch that allows a triple (or flag) to override the native path separators used when constructing the DWARF linetable directory-entries. This means that if you feed LLVM-IR with Windows path endings in the debug-info metadata into llc on Linux, you'll get an identical output. Otherwise, the difference in path separators means a different set of directories is produced (see test).

The overall rational for this patch is identical-binary-checking of objects produced on Linux versus Windows. The PlayStation build environment is fundamentally a Windows environment, but we often do testing under a Linux environment because it's easier. We feel a lot more confident about this when we produce identical binaries whether building on Windows or Linux, and are able to test linux-built binaries in Windows environments with as few differences as possible. Hence this patch!

There's precedent in the clang frontend with the -ffile-reproducible flag and a few other switches, which I believe helps Chromium generate identical binaries between Linux/Windows, although not for debug-info sections.

---
Full diff: https://github.com/llvm/llvm-project/pull/115888.diff


6 Files Affected:

- (modified) llvm/include/llvm/MC/MCContext.h (+5-1) 
- (modified) llvm/include/llvm/MC/MCDwarf.h (+11-4) 
- (modified) llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (+2-1) 
- (modified) llvm/lib/MC/MCContext.cpp (+10-3) 
- (modified) llvm/lib/MC/MCDwarf.cpp (+7-5) 
- (added) llvm/test/DebugInfo/dir-table-path-separators.ll (+74) 


``````````diff
diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h
index 57ba40f7ac26fc..3fc241b066b350 100644
--- a/llvm/include/llvm/MC/MCContext.h
+++ b/llvm/include/llvm/MC/MCContext.h
@@ -335,6 +335,8 @@ class MCContext {
 
   MCTargetOptions const *TargetOptions;
 
+  llvm::sys::path::Style PathStyle = llvm::sys::path::Style::native;
+
   bool HadError = false;
 
   void reportCommon(SMLoc Loc,
@@ -721,6 +723,8 @@ class MCContext {
 
   void setDwarfCompileUnitID(unsigned CUIndex) { DwarfCompileUnitID = CUIndex; }
 
+  llvm::sys::path::Style getPathStyle() const { return PathStyle; }
+
   /// Specifies the "root" file and directory of the compilation unit.
   /// These are "file 0" and "directory 0" in DWARF v5.
   void setMCLineTableRootFile(unsigned CUID, StringRef CompilationDir,
@@ -728,7 +732,7 @@ class MCContext {
                               std::optional<MD5::MD5Result> Checksum,
                               std::optional<StringRef> Source) {
     getMCDwarfLineTable(CUID).setRootFile(CompilationDir, Filename, Checksum,
-                                          Source);
+                                          Source, PathStyle);
   }
 
   /// Reports whether MD5 checksum usage is consistent (all-or-none).
diff --git a/llvm/include/llvm/MC/MCDwarf.h b/llvm/include/llvm/MC/MCDwarf.h
index 1392336968e74a..58e93d790305cb 100644
--- a/llvm/include/llvm/MC/MCDwarf.h
+++ b/llvm/include/llvm/MC/MCDwarf.h
@@ -21,6 +21,7 @@
 #include "llvm/MC/StringTableBuilder.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/MD5.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/SMLoc.h"
 #include "llvm/Support/StringSaver.h"
 #include <cassert>
@@ -277,6 +278,7 @@ struct MCDwarfLineTableHeader {
   StringMap<unsigned> SourceIdMap;
   std::string CompilationDir;
   MCDwarfFile RootFile;
+  llvm::sys::path::Style PathStyle = llvm::sys::path::Style::native;
   bool HasAnySource = false;
 
 private:
@@ -311,7 +313,9 @@ struct MCDwarfLineTableHeader {
 
   void setRootFile(StringRef Directory, StringRef FileName,
                    std::optional<MD5::MD5Result> Checksum,
-                   std::optional<StringRef> Source) {
+                   std::optional<StringRef> Source,
+                   llvm::sys::path::Style S = llvm::sys::path::Style::native) {
+    PathStyle = S;
     CompilationDir = std::string(Directory);
     RootFile.Name = std::string(FileName);
     RootFile.DirIndex = 0;
@@ -342,10 +346,11 @@ class MCDwarfDwoLineTable {
 public:
   void maybeSetRootFile(StringRef Directory, StringRef FileName,
                         std::optional<MD5::MD5Result> Checksum,
-                        std::optional<StringRef> Source) {
+                        std::optional<StringRef> Source,
+                        llvm::sys::path::Style S) {
     if (!Header.RootFile.Name.empty())
       return;
-    Header.setRootFile(Directory, FileName, Checksum, Source);
+    Header.setRootFile(Directory, FileName, Checksum, Source, S);
   }
 
   unsigned getFile(StringRef Directory, StringRef FileName,
@@ -394,7 +399,9 @@ class MCDwarfLineTable {
 
   void setRootFile(StringRef Directory, StringRef FileName,
                    std::optional<MD5::MD5Result> Checksum,
-                   std::optional<StringRef> Source) {
+                   std::optional<StringRef> Source,
+                   llvm::sys::path::Style S) {
+    Header.PathStyle = S;
     Header.CompilationDir = std::string(Directory);
     Header.RootFile.Name = std::string(FileName);
     Header.RootFile.DirIndex = 0;
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 88ed3f5dc7b4b4..3c3aaffeaba3dc 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -3429,7 +3429,8 @@ MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) {
   const DICompileUnit *DIUnit = CU.getCUNode();
   SplitTypeUnitFileTable.maybeSetRootFile(
       DIUnit->getDirectory(), DIUnit->getFilename(),
-      getMD5AsBytes(DIUnit->getFile()), DIUnit->getSource());
+      getMD5AsBytes(DIUnit->getFile()), DIUnit->getSource(),
+      Asm->OutStreamer->getContext().getPathStyle());
   return &SplitTypeUnitFileTable;
 }
 
diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp
index b97f9d9f5fed0f..e1e3f9ddfeff7b 100644
--- a/llvm/lib/MC/MCContext.cpp
+++ b/llvm/lib/MC/MCContext.cpp
@@ -59,6 +59,10 @@
 
 using namespace llvm;
 
+cl::opt<bool> ForceDWARFWindowsPathSeps ("force-dwarf-windows-path-seps",
+   cl::desc("Use Windows path separators when building DWARF linetables"),
+   cl::Hidden);
+
 static void defaultDiagHandler(const SMDiagnostic &SMD, bool, const SourceMgr &,
                                std::vector<const MDNode *> &) {
   SMD.print(nullptr, errs());
@@ -77,6 +81,9 @@ MCContext::MCContext(const Triple &TheTriple, const MCAsmInfo *mai,
   SaveTempLabels = TargetOptions && TargetOptions->MCSaveTempLabels;
   SecureLogFile = TargetOptions ? TargetOptions->AsSecureLogFile : "";
 
+  if (ForceDWARFWindowsPathSeps || TheTriple.isPS())
+    PathStyle = llvm::sys::path::Style::windows;
+
   if (SrcMgr && SrcMgr->getNumBuffers())
     MainFileName = std::string(SrcMgr->getMemoryBuffer(SrcMgr->getMainFileID())
                                    ->getBufferIdentifier());
@@ -970,12 +977,12 @@ void MCContext::setGenDwarfRootFile(StringRef InputFileName, StringRef Buffer) {
   if (FileNameBuf.empty() || FileNameBuf == "-")
     FileNameBuf = "<stdin>";
   if (!getMainFileName().empty() && FileNameBuf != getMainFileName()) {
-    llvm::sys::path::remove_filename(FileNameBuf);
-    llvm::sys::path::append(FileNameBuf, getMainFileName());
+    llvm::sys::path::remove_filename(FileNameBuf, PathStyle);
+    llvm::sys::path::append(FileNameBuf, PathStyle, getMainFileName());
   }
   StringRef FileName = FileNameBuf;
   if (FileName.consume_front(getCompilationDir()))
-    if (llvm::sys::path::is_separator(FileName.front()))
+    if (llvm::sys::path::is_separator(FileName.front(), PathStyle))
       FileName = FileName.drop_front();
   assert(!FileName.empty());
   setMCLineTableRootFile(
diff --git a/llvm/lib/MC/MCDwarf.cpp b/llvm/lib/MC/MCDwarf.cpp
index e058358fb8ad4b..3df95d8a0f196c 100644
--- a/llvm/lib/MC/MCDwarf.cpp
+++ b/llvm/lib/MC/MCDwarf.cpp
@@ -664,9 +664,9 @@ MCDwarfLineTableHeader::tryGetFile(StringRef &Directory, StringRef &FileName,
 
   if (Directory.empty()) {
     // Separate the directory part from the basename of the FileName.
-    StringRef tFileName = sys::path::filename(FileName);
+    StringRef tFileName = sys::path::filename(FileName, PathStyle);
     if (!tFileName.empty()) {
-      Directory = sys::path::parent_path(FileName);
+      Directory = sys::path::parent_path(FileName, PathStyle);
       if (!Directory.empty())
         FileName = tFileName;
     }
@@ -939,7 +939,8 @@ static void EmitGenDwarfAranges(MCStreamer *MCOS,
 static void EmitGenDwarfInfo(MCStreamer *MCOS,
                              const MCSymbol *AbbrevSectionSymbol,
                              const MCSymbol *LineSectionSymbol,
-                             const MCSymbol *RangesSymbol) {
+                             const MCSymbol *RangesSymbol,
+                             llvm::sys::path::Style Style) {
   MCContext &context = MCOS->getContext();
 
   MCOS->switchSection(context.getObjectFileInfo()->getDwarfInfoSection());
@@ -1037,7 +1038,7 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS,
   const SmallVectorImpl<std::string> &MCDwarfDirs = context.getMCDwarfDirs();
   if (MCDwarfDirs.size() > 0) {
     MCOS->emitBytes(MCDwarfDirs[0]);
-    MCOS->emitBytes(sys::path::get_separator());
+    MCOS->emitBytes(sys::path::get_separator(Style));
   }
   const SmallVectorImpl<MCDwarfFile> &MCDwarfFiles = context.getMCDwarfFiles();
   // MCDwarfFiles might be empty if we have an empty source file.
@@ -1225,7 +1226,8 @@ void MCGenDwarfInfo::Emit(MCStreamer *MCOS) {
   EmitGenDwarfAbbrev(MCOS);
 
   // Output the data for .debug_info section.
-  EmitGenDwarfInfo(MCOS, AbbrevSectionSymbol, LineSectionSymbol, RangesSymbol);
+  EmitGenDwarfInfo(MCOS, AbbrevSectionSymbol, LineSectionSymbol, RangesSymbol,
+                   context.getPathStyle());
 }
 
 //
diff --git a/llvm/test/DebugInfo/dir-table-path-separators.ll b/llvm/test/DebugInfo/dir-table-path-separators.ll
new file mode 100644
index 00000000000000..a9c8e039af55f0
--- /dev/null
+++ b/llvm/test/DebugInfo/dir-table-path-separators.ll
@@ -0,0 +1,74 @@
+; RUN: llc %s -o - -filetype=obj -mtriple x86_64-pc-linux-gnu | llvm-dwarfdump - --debug-line | FileCheck %s --check-prefix=LINUX
+; RUN: llc %s -o - -filetype=obj -mtriple x86_64-pc-linux-gnu -force-dwarf-windows-path-seps=true | llvm-dwarfdump - --debug-line | FileCheck %s --check-prefix=PS5
+; RUN: llc %s -o - -filetype=obj -mtriple x86_64-sie-ps5 | llvm-dwarfdump - --debug-line | FileCheck %s --check-prefix=PS5
+;
+; UNSUPPORTED: system-windows
+;
+; Check that the DWARF-printing MC backend is willing to consider Windows '\'
+; characters as path separators so that it can build the directory index table.
+; On Linux, the Windows path separators below would been seen as part of the
+; filename, and so wouldn't be combined into a directory entry. Wheras on
+; Windows (or a target masquerading as Windows) they should be combined into a
+; "foo\bar" directory.
+;
+; LINUX: include_directories[  0] = "C:\\foobar"
+; LINUX: file_names[  0]:
+; LINUX:           name: "foo\\bar\\test.cpp"
+; LINUX:      dir_index: 0
+; LINUX: file_names[  1]:
+; LINUX:           name: "foo\\bar\\bar.cpp"
+; LINUX:      dir_index: 0
+; LINUX: file_names[  2]:
+; LINUX:           name: "foo\\bar\\baz.cpp"
+; LINUX:      dir_index: 0
+;
+; PS5:      include_directories[  0] = "C:\\foobar"
+; PS5-NEXT: include_directories[  1] = "foo\\bar"
+; PS5-NEXT: file_names[  0]:
+; PS5-NEXT:           name: "foo\\bar\\test.cpp"
+; PS5-NEXT:      dir_index: 0
+; PS5-NEXT: file_names[  1]:
+; PS5-NEXT:           name: "bar.cpp"
+; PS5-NEXT:      dir_index: 1
+; PS5-NEXT: file_names[  2]:
+; PS5-NEXT:           name: "baz.cpp"
+; PS5-NEXT:      dir_index: 1
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+
+define dso_local noundef i32 @_Z3foov() local_unnamed_addr !dbg !9 {
+  ret i32 0, !dbg !14
+}
+
+define dso_local noundef i32 @_Z3barv() local_unnamed_addr !dbg !15 {
+  ret i32 0, !dbg !17
+}
+
+define dso_local noundef i32 @main() local_unnamed_addr !dbg !18 {
+  ret i32 0, !dbg !19
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3, !4, !5, !6, !7}
+!llvm.ident = !{!8}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "foo\\bar\\test.cpp", directory: "C:\\foobar")
+!2 = !{i32 7, !"Dwarf Version", i32 5}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"wchar_size", i32 4}
+!5 = !{i32 8, !"PIC Level", i32 2}
+!6 = !{i32 7, !"PIE Level", i32 2}
+!7 = !{i32 7, !"uwtable", i32 2}
+!8 = !{!"clang"}
+!9 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !10, file: !10, line: 1, type: !11, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!10 = !DIFile(filename: "foo\\bar\\bar.cpp", directory: "C:\\foobar")
+!11 = !DISubroutineType(types: !12)
+!12 = !{!13}
+!13 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!14 = !DILocation(line: 2, column: 3, scope: !9)
+!15 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", scope: !16, file: !16, line: 1, type: !11, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!16 = !DIFile(filename: "foo\\bar\\baz.cpp", directory: "C:\\foobar")
+!17 = !DILocation(line: 2, column: 3, scope: !15)
+!18 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 3, type: !11, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!19 = !DILocation(line: 4, column: 3, scope: !18)

``````````

</details>


https://github.com/llvm/llvm-project/pull/115888


More information about the llvm-commits mailing list