[lld] [lld][MachO] Prevent doubled N_SO when comp_dir and name absolute (PR #71608)

Daniel Thornburgh via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 8 15:50:23 PST 2023


https://github.com/mysterymath updated https://github.com/llvm/llvm-project/pull/71608

>From fac9f6a522af0843d9956f45f96a4661e8b2a3a8 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Tue, 7 Nov 2023 15:53:13 -0800
Subject: [PATCH 1/4] [lld][MachO] Prevent doubled N_SO when comp_dir and name
 absolute

When forming MachO STABS, this change detects if the DW_AT_name of the
compile unit is already absolute (as allowed by DWARF), and if so, does
not prepend DW_AT_comp_dir.

Fixes #70995
---
 lld/MachO/InputFiles.cpp        |  5 ++-
 lld/test/MachO/stabs-abs-path.s | 58 +++++++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+), 1 deletion(-)
 create mode 100644 lld/test/MachO/stabs-abs-path.s

diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 09c6ea9b19b5da2..4cf38169c4bfc9a 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -1522,13 +1522,16 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) {
 }
 
 std::string ObjFile::sourceFile() const {
+  const char *unitName = compileUnit->getUnitDIE().getShortName();
+  if (sys::path::is_absolute(unitName))
+    return unitName;
   SmallString<261> dir(compileUnit->getCompilationDir());
   StringRef sep = sys::path::get_separator();
   // We don't use `path::append` here because we want an empty `dir` to result
   // in an absolute path. `append` would give us a relative path for that case.
   if (!dir.endswith(sep))
     dir += sep;
-  return (dir + compileUnit->getUnitDIE().getShortName()).str();
+  return (dir + unitName).str();
 }
 
 lld::DWARFCache *ObjFile::getDwarf() {
diff --git a/lld/test/MachO/stabs-abs-path.s b/lld/test/MachO/stabs-abs-path.s
new file mode 100644
index 000000000000000..565ed1ae7715598
--- /dev/null
+++ b/lld/test/MachO/stabs-abs-path.s
@@ -0,0 +1,58 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
+
+# RUN: %lld -lSystem %t.o -o %t
+# RUN: dsymutil -s %t | FileCheck %s
+
+# CHECK:      (N_SO         ) 00      0000   0000000000000000   '/foo.cpp'
+
+.text
+.globl  _main
+_main:
+Lfunc_begin0:
+  retq
+Lfunc_end0:
+
+.section  __DWARF,__debug_str,regular,debug
+  .asciz  "/foo.cpp"             ## string offset=0
+  .asciz  "/tmp"                 ## string offset=9
+.section  __DWARF,__debug_abbrev,regular,debug
+Lsection_abbrev:
+  .byte  1                       ## Abbreviation Code
+  .byte  17                      ## DW_TAG_compile_unit
+  .byte  1                       ## DW_CHILDREN_yes
+  .byte  3                       ## DW_AT_name
+  .byte  14                      ## DW_FORM_strp
+  .byte  27                      ## DW_AT_comp_dir
+  .byte  14                      ## DW_FORM_strp
+  .byte  17                      ## DW_AT_low_pc
+  .byte  1                       ## DW_FORM_addr
+  .byte  18                      ## DW_AT_high_pc
+  .byte  6                       ## DW_FORM_data4
+  .byte  0                       ## EOM(1)
+  .byte  0                       ## EOM(2)
+  .byte  0                       ## EOM(3)
+.section  __DWARF,__debug_info,regular,debug
+.set Lset0, Ldebug_info_end0-Ldebug_info_start0 ## Length of Unit
+  .long  Lset0
+Ldebug_info_start0:
+  .short  4                       ## DWARF version number
+.set Lset1, Lsection_abbrev-Lsection_abbrev ## Offset Into Abbrev. Section
+  .long  Lset1
+  .byte  8                       ## Address Size (in bytes)
+  .byte  1                       ## Abbrev [1] 0xb:0x48 DW_TAG_compile_unit
+  .long  0                       ## DW_AT_name
+  .long  9                       ## DW_AT_comp_dir
+  .quad  Lfunc_begin0            ## DW_AT_low_pc
+.set Lset3, Lfunc_end0-Lfunc_begin0     ## DW_AT_high_pc
+  .long  Lset3
+  .byte  0                       ## End Of Children Mark
+Ldebug_info_end0:
+
+.section  __DWARF,__debug_aranges,regular,debug
+ltmp1:
+  .byte 0
+
+.subsections_via_symbols
+
+

>From ba44fab93b22869703a3aa6d1ace5cb7e546f8de Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Wed, 8 Nov 2023 14:01:03 -0800
Subject: [PATCH 2/4] Mirror the path logic used in LLVM Object library

---
 lld/MachO/InputFiles.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 4cf38169c4bfc9a..7c020d6e06051dc 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -1523,7 +1523,8 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) {
 
 std::string ObjFile::sourceFile() const {
   const char *unitName = compileUnit->getUnitDIE().getShortName();
-  if (sys::path::is_absolute(unitName))
+  if (sys::path::is_absolute(unitName, llvm::sys::path::Style::posix) ||
+      sys::path::is_absolute(unitName, llvm::sys::path::Style::windows))
     return unitName;
   SmallString<261> dir(compileUnit->getCompilationDir());
   StringRef sep = sys::path::get_separator();

>From 2733d54fc46567c6caae05be5ecf81ec61b9c847 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Wed, 8 Nov 2023 15:39:07 -0800
Subject: [PATCH 3/4] Add comment from DWARF library

---
 lld/MachO/InputFiles.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 7c020d6e06051dc..31ed24149e78ddf 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -1523,6 +1523,11 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) {
 
 std::string ObjFile::sourceFile() const {
   const char *unitName = compileUnit->getUnitDIE().getShortName();
+  // DWARF allows DW_AT_name to be absolute, in which case nothing should be
+  // prepended. As for the styles, debug info can contain paths from any OS, not
+  // necessarily an OS we're currently running on. Moreover different
+  // compilation units can be compiled on different operating systems and linked
+  // together later.
   if (sys::path::is_absolute(unitName, llvm::sys::path::Style::posix) ||
       sys::path::is_absolute(unitName, llvm::sys::path::Style::windows))
     return unitName;

>From 8fed0e05ec18a385a3b99d5debf7cf16cd230a24 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Wed, 8 Nov 2023 15:47:34 -0800
Subject: [PATCH 4/4] Move test to stabs.s

---
 lld/test/MachO/stabs-abs-path.s | 58 ---------------------------------
 lld/test/MachO/stabs.s          | 55 +++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+), 58 deletions(-)
 delete mode 100644 lld/test/MachO/stabs-abs-path.s

diff --git a/lld/test/MachO/stabs-abs-path.s b/lld/test/MachO/stabs-abs-path.s
deleted file mode 100644
index 565ed1ae7715598..000000000000000
--- a/lld/test/MachO/stabs-abs-path.s
+++ /dev/null
@@ -1,58 +0,0 @@
-# REQUIRES: x86
-# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
-
-# RUN: %lld -lSystem %t.o -o %t
-# RUN: dsymutil -s %t | FileCheck %s
-
-# CHECK:      (N_SO         ) 00      0000   0000000000000000   '/foo.cpp'
-
-.text
-.globl  _main
-_main:
-Lfunc_begin0:
-  retq
-Lfunc_end0:
-
-.section  __DWARF,__debug_str,regular,debug
-  .asciz  "/foo.cpp"             ## string offset=0
-  .asciz  "/tmp"                 ## string offset=9
-.section  __DWARF,__debug_abbrev,regular,debug
-Lsection_abbrev:
-  .byte  1                       ## Abbreviation Code
-  .byte  17                      ## DW_TAG_compile_unit
-  .byte  1                       ## DW_CHILDREN_yes
-  .byte  3                       ## DW_AT_name
-  .byte  14                      ## DW_FORM_strp
-  .byte  27                      ## DW_AT_comp_dir
-  .byte  14                      ## DW_FORM_strp
-  .byte  17                      ## DW_AT_low_pc
-  .byte  1                       ## DW_FORM_addr
-  .byte  18                      ## DW_AT_high_pc
-  .byte  6                       ## DW_FORM_data4
-  .byte  0                       ## EOM(1)
-  .byte  0                       ## EOM(2)
-  .byte  0                       ## EOM(3)
-.section  __DWARF,__debug_info,regular,debug
-.set Lset0, Ldebug_info_end0-Ldebug_info_start0 ## Length of Unit
-  .long  Lset0
-Ldebug_info_start0:
-  .short  4                       ## DWARF version number
-.set Lset1, Lsection_abbrev-Lsection_abbrev ## Offset Into Abbrev. Section
-  .long  Lset1
-  .byte  8                       ## Address Size (in bytes)
-  .byte  1                       ## Abbrev [1] 0xb:0x48 DW_TAG_compile_unit
-  .long  0                       ## DW_AT_name
-  .long  9                       ## DW_AT_comp_dir
-  .quad  Lfunc_begin0            ## DW_AT_low_pc
-.set Lset3, Lfunc_end0-Lfunc_begin0     ## DW_AT_high_pc
-  .long  Lset3
-  .byte  0                       ## End Of Children Mark
-Ldebug_info_end0:
-
-.section  __DWARF,__debug_aranges,regular,debug
-ltmp1:
-  .byte 0
-
-.subsections_via_symbols
-
-
diff --git a/lld/test/MachO/stabs.s b/lld/test/MachO/stabs.s
index 145e7960b6657a7..80658b2f9e52916 100644
--- a/lld/test/MachO/stabs.s
+++ b/lld/test/MachO/stabs.s
@@ -145,6 +145,11 @@
 # PIE-NEXT:  segment  section            address     type
 # PIE-EMPTY:
 
+# RUN: llvm-mc -filetype obj -triple=x86_64-apple-darwin %t/abs-path.s -o %t/abs-path.o
+# RUN: %lld %t/abs-path.o -o %t/test
+# RUN: (llvm-objdump --section-headers %t/test; dsymutil -s %t/test) | FileCheck %s --check-prefix=ABS-PATH
+# ABS-PATH:      (N_SO         ) 00      0000   0000000000000000   '/foo.cpp'
+
 #--- test.s
 
 ## Make sure we don't create STABS entries for absolute symbols.
@@ -287,3 +292,53 @@ ltmp1:
 .globl _no_debug
 _no_debug:
   ret
+
+#--- abs-path.s
+.text
+.globl  _main
+_main:
+Lfunc_begin0:
+  retq
+Lfunc_end0:
+
+.section  __DWARF,__debug_str,regular,debug
+  .asciz  "/foo.cpp"             ## string offset=0
+  .asciz  "/tmp"                 ## string offset=9
+.section  __DWARF,__debug_abbrev,regular,debug
+Lsection_abbrev:
+  .byte  1                       ## Abbreviation Code
+  .byte  17                      ## DW_TAG_compile_unit
+  .byte  1                       ## DW_CHILDREN_yes
+  .byte  3                       ## DW_AT_name
+  .byte  14                      ## DW_FORM_strp
+  .byte  27                      ## DW_AT_comp_dir
+  .byte  14                      ## DW_FORM_strp
+  .byte  17                      ## DW_AT_low_pc
+  .byte  1                       ## DW_FORM_addr
+  .byte  18                      ## DW_AT_high_pc
+  .byte  6                       ## DW_FORM_data4
+  .byte  0                       ## EOM(1)
+  .byte  0                       ## EOM(2)
+  .byte  0                       ## EOM(3)
+.section  __DWARF,__debug_info,regular,debug
+.set Lset0, Ldebug_info_end0-Ldebug_info_start0 ## Length of Unit
+  .long  Lset0
+Ldebug_info_start0:
+  .short  4                       ## DWARF version number
+.set Lset1, Lsection_abbrev-Lsection_abbrev ## Offset Into Abbrev. Section
+  .long  Lset1
+  .byte  8                       ## Address Size (in bytes)
+  .byte  1                       ## Abbrev [1] 0xb:0x48 DW_TAG_compile_unit
+  .long  0                       ## DW_AT_name
+  .long  9                       ## DW_AT_comp_dir
+  .quad  Lfunc_begin0            ## DW_AT_low_pc
+.set Lset3, Lfunc_end0-Lfunc_begin0     ## DW_AT_high_pc
+  .long  Lset3
+  .byte  0                       ## End Of Children Mark
+Ldebug_info_end0:
+
+.section  __DWARF,__debug_aranges,regular,debug
+ltmp1:
+  .byte 0
+
+.subsections_via_symbols



More information about the llvm-commits mailing list