[llvm] [llvm][DebugInfo] Support versioned source language names in DwarfUnit (PR #162625)

Michael Buch via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 10 05:46:51 PDT 2025


https://github.com/Michael137 updated https://github.com/llvm/llvm-project/pull/162625

>From 1ddc997373468ccd72ca31d916c6917984599e01 Mon Sep 17 00:00:00 2001
From: Michael Buch <michaelbuch12 at gmail.com>
Date: Tue, 7 Oct 2025 10:03:45 +0100
Subject: [PATCH 1/4] [llvm][DebugInfo] Emit DW_AT_lanugage_name

---
 llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp    |  8 ++++++--
 .../compileunit-source-language-name.ll       | 20 +++++++++++++++++++
 .../Generic/compileunit-source-language.ll    | 20 +++++++++++++++++++
 3 files changed, 46 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/DebugInfo/Generic/compileunit-source-language-name.ll
 create mode 100644 llvm/test/DebugInfo/Generic/compileunit-source-language.ll

diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index d751a7f9f01ef..433877f3a8b98 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -1039,8 +1039,12 @@ void DwarfDebug::finishUnitAttributes(const DICompileUnit *DIUnit,
   } else
     NewCU.addString(Die, dwarf::DW_AT_producer, Producer);
 
-  NewCU.addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
-                DIUnit->getSourceLanguage().getUnversionedName());
+  if (auto Lang = DIUnit->getSourceLanguage(); Lang.hasVersionedName())
+    NewCU.addUInt(Die, dwarf::DW_AT_language_name, dwarf::DW_FORM_data2,
+                  Lang.getName());
+  else
+    NewCU.addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
+                  Lang.getName());
 
   NewCU.addString(Die, dwarf::DW_AT_name, FN);
   StringRef SysRoot = DIUnit->getSysRoot();
diff --git a/llvm/test/DebugInfo/Generic/compileunit-source-language-name.ll b/llvm/test/DebugInfo/Generic/compileunit-source-language-name.ll
new file mode 100644
index 0000000000000..94e32ab8c65b2
--- /dev/null
+++ b/llvm/test/DebugInfo/Generic/compileunit-source-language-name.ll
@@ -0,0 +1,20 @@
+; RUN: %llc_dwarf -filetype=obj -O0 < %s | llvm-dwarfdump -debug-info - | FileCheck %s --implicit-check-not "DW_AT_language"
+
+; CHECK: DW_AT_language_name (DW_LNAME_ObjC_plus_plus)
+
+source_filename = "cu.cpp"
+target triple = "arm64-apple-macosx"
+
+ at x = global i32 0, align 4, !dbg !0
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!6, !7}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(sourceLanguageName: DW_LNAME_ObjC_plus_plus, file: !3, producer: "handwritten", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/")
+!3 = !DIFile(filename: "cu.cpp", directory: "/tmp")
+!4 = !{!0}
+!5 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!6 = !{i32 7, !"Dwarf Version", i32 5}
+!7 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/llvm/test/DebugInfo/Generic/compileunit-source-language.ll b/llvm/test/DebugInfo/Generic/compileunit-source-language.ll
new file mode 100644
index 0000000000000..0d0da4fa11d64
--- /dev/null
+++ b/llvm/test/DebugInfo/Generic/compileunit-source-language.ll
@@ -0,0 +1,20 @@
+; RUN: %llc_dwarf -filetype=obj -O0 < %s | llvm-dwarfdump -debug-info - | FileCheck %s --implicit-check-not "DW_AT_language_name"
+
+; CHECK: DW_AT_language (DW_LANG_C)
+
+source_filename = "cu.cpp"
+target triple = "arm64-apple-macosx"
+
+ at x = global i32 0, align 4, !dbg !0
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!6, !7}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C, file: !3, producer: "handwritten", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/")
+!3 = !DIFile(filename: "cu.cpp", directory: "/tmp")
+!4 = !{!0}
+!5 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!6 = !{i32 7, !"Dwarf Version", i32 5}
+!7 = !{i32 2, !"Debug Info Version", i32 3}

>From 2f5a3a71cf64070b5c11002fdf2fbd73c1029b48 Mon Sep 17 00:00:00 2001
From: Michael Buch <michaelbuch12 at gmail.com>
Date: Fri, 10 Oct 2025 11:24:28 +0100
Subject: [PATCH 2/4] fixup! fix CodeView SourceLanguage mappings

---
 llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp | 42 ++++++++++++++++++-
 1 file changed, 40 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 12d749ce56f06..d4a30cd4cb1dd 100644
--- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -569,7 +569,40 @@ void CodeViewDebug::emitCodeViewMagicVersion() {
   OS.emitInt32(COFF::DEBUG_SECTION_MAGIC);
 }
 
-static SourceLanguage MapDWLangToCVLang(unsigned DWLang) {
+static SourceLanguage
+MapDWARFLanguageToCVLang(dwarf::SourceLanguageName DWLName) {
+  switch (DWLName) {
+  case dwarf::DW_LNAME_C:
+    return SourceLanguage::C;
+  case dwarf::DW_LNAME_C_plus_plus:
+    return SourceLanguage::Cpp;
+  case dwarf::DW_LNAME_Fortran:
+    return SourceLanguage::Fortran;
+  case dwarf::DW_LNAME_Pascal:
+    return SourceLanguage::Pascal;
+  case dwarf::DW_LNAME_Cobol:
+    return SourceLanguage::Cobol;
+  case dwarf::DW_LNAME_Java:
+    return SourceLanguage::Java;
+  case dwarf::DW_LNAME_D:
+    return SourceLanguage::D;
+  case dwarf::DW_LNAME_Swift:
+    return SourceLanguage::Swift;
+  case dwarf::DW_LNAME_Rust:
+    return SourceLanguage::Rust;
+  case dwarf::DW_LNAME_ObjC:
+    return SourceLanguage::ObjC;
+  case dwarf::DW_LNAME_ObjC_plus_plus:
+    return SourceLanguage::ObjCpp;
+  default:
+    // There's no CodeView representation for this language, and CV doesn't
+    // have an "unknown" option for the language field, so we'll use MASM,
+    // as it's very low level.
+    return SourceLanguage::Masm;
+  }
+}
+
+static SourceLanguage MapDWARFLanguageToCVLang(dwarf::SourceLanguage DWLang) {
   switch (DWLang) {
   case dwarf::DW_LANG_C:
   case dwarf::DW_LANG_C89:
@@ -633,8 +666,13 @@ void CodeViewDebug::beginModule(Module *M) {
     Node = *CUs->operands().begin();
   }
   const auto *CU = cast<DICompileUnit>(Node);
+  DISourceLanguageName Lang = CU->getSourceLanguage();
   CurrentSourceLanguage =
-      MapDWLangToCVLang(CU->getSourceLanguage().getUnversionedName());
+      Lang.hasVersionedName()
+          ? MapDWARFLanguageToCVLang(
+                static_cast<dwarf::SourceLanguageName>(Lang.getName()))
+          : MapDWARFLanguageToCVLang(
+                static_cast<dwarf::SourceLanguage>(Lang.getName()));
   if (!M->getCodeViewFlag() ||
       CU->getEmissionKind() == DICompileUnit::NoDebug) {
     Asm = nullptr;

>From 34d4fa23f9ea80e566ac9b3f86e32f1f17ac0e81 Mon Sep 17 00:00:00 2001
From: Michael Buch <michaelbuch12 at gmail.com>
Date: Fri, 10 Oct 2025 13:35:22 +0100
Subject: [PATCH 3/4] fixup! simplify MapDWARFLanguageToCVLang

---
 llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp | 45 +++----------------
 1 file changed, 5 insertions(+), 40 deletions(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index d4a30cd4cb1dd..e57ed24a45065 100644
--- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -603,46 +603,11 @@ MapDWARFLanguageToCVLang(dwarf::SourceLanguageName DWLName) {
 }
 
 static SourceLanguage MapDWARFLanguageToCVLang(dwarf::SourceLanguage DWLang) {
-  switch (DWLang) {
-  case dwarf::DW_LANG_C:
-  case dwarf::DW_LANG_C89:
-  case dwarf::DW_LANG_C99:
-  case dwarf::DW_LANG_C11:
-    return SourceLanguage::C;
-  case dwarf::DW_LANG_C_plus_plus:
-  case dwarf::DW_LANG_C_plus_plus_03:
-  case dwarf::DW_LANG_C_plus_plus_11:
-  case dwarf::DW_LANG_C_plus_plus_14:
-    return SourceLanguage::Cpp;
-  case dwarf::DW_LANG_Fortran77:
-  case dwarf::DW_LANG_Fortran90:
-  case dwarf::DW_LANG_Fortran95:
-  case dwarf::DW_LANG_Fortran03:
-  case dwarf::DW_LANG_Fortran08:
-    return SourceLanguage::Fortran;
-  case dwarf::DW_LANG_Pascal83:
-    return SourceLanguage::Pascal;
-  case dwarf::DW_LANG_Cobol74:
-  case dwarf::DW_LANG_Cobol85:
-    return SourceLanguage::Cobol;
-  case dwarf::DW_LANG_Java:
-    return SourceLanguage::Java;
-  case dwarf::DW_LANG_D:
-    return SourceLanguage::D;
-  case dwarf::DW_LANG_Swift:
-    return SourceLanguage::Swift;
-  case dwarf::DW_LANG_Rust:
-    return SourceLanguage::Rust;
-  case dwarf::DW_LANG_ObjC:
-    return SourceLanguage::ObjC;
-  case dwarf::DW_LANG_ObjC_plus_plus:
-    return SourceLanguage::ObjCpp;
-  default:
-    // There's no CodeView representation for this language, and CV doesn't
-    // have an "unknown" option for the language field, so we'll use MASM,
-    // as it's very low level.
-    return SourceLanguage::Masm;
-  }
+  auto MaybeLName = dwarf::toDW_LNAME(DWLang);
+  if (!MaybeLName)
+    return MapDWARFLanguageToCVLang(static_cast<dwarf::SourceLanguageName>(0));
+
+  return MapDWARFLanguageToCVLang(MaybeLName->first);
 }
 
 void CodeViewDebug::beginModule(Module *M) {

>From aaafe4a6d660d68ee21fef5621ec86ede3941379 Mon Sep 17 00:00:00 2001
From: Michael Buch <michaelbuch12 at gmail.com>
Date: Thu, 9 Oct 2025 11:00:54 +0100
Subject: [PATCH 4/4] [llvm][DebugInfo] Support versioned source language names
 in DwarfUnit

---
 llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp             | 11 +++++++++--
 .../Generic/compileunit-source-language-name.ll       |  9 +++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index aa078f3f81d49..b58536f89ccb5 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -705,8 +705,15 @@ void DwarfUnit::addType(DIE &Entity, const DIType *Ty,
 }
 
 llvm::dwarf::SourceLanguage DwarfUnit::getSourceLanguage() const {
-  return static_cast<llvm::dwarf::SourceLanguage>(
-      getLanguage().getUnversionedName());
+  const auto &Lang = getLanguage();
+
+  if (!Lang.hasVersionedName())
+    return static_cast<llvm::dwarf::SourceLanguage>(Lang.getName());
+
+  return llvm::dwarf::toDW_LANG(
+             static_cast<llvm::dwarf::SourceLanguageName>(Lang.getName()),
+             Lang.getVersion())
+      .value_or(llvm::dwarf::DW_LANG_hi_user);
 }
 
 std::string DwarfUnit::getParentContextString(const DIScope *Context) const {
diff --git a/llvm/test/DebugInfo/Generic/compileunit-source-language-name.ll b/llvm/test/DebugInfo/Generic/compileunit-source-language-name.ll
index 94e32ab8c65b2..edb4e669d4631 100644
--- a/llvm/test/DebugInfo/Generic/compileunit-source-language-name.ll
+++ b/llvm/test/DebugInfo/Generic/compileunit-source-language-name.ll
@@ -7,6 +7,11 @@ target triple = "arm64-apple-macosx"
 
 @x = global i32 0, align 4, !dbg !0
 
+; Function Attrs: mustprogress noinline nounwind optnone ssp uwtable(sync)
+define void @_Z4funcv() !dbg !8 {
+  ret void, !dbg !11
+}
+
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!6, !7}
 
@@ -18,3 +23,7 @@ target triple = "arm64-apple-macosx"
 !5 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
 !6 = !{i32 7, !"Dwarf Version", i32 5}
 !7 = !{i32 2, !"Debug Info Version", i32 3}
+!8 = distinct !DISubprogram(name: "func", linkageName: "_Z4funcv", scope: !3, file: !3, line: 2, type: !9, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2)
+!9 = !DISubroutineType(types: !10)
+!10 = !{null}
+!11 = !DILocation(line: 2, column: 14, scope: !8)



More information about the llvm-commits mailing list