[llvm] [llvm][DebugInfo] Support versioned source language names in DwarfUnit (PR #162625)

Michael Buch via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 9 03:02:35 PDT 2025


https://github.com/Michael137 created https://github.com/llvm/llvm-project/pull/162625

Depends on:
* https://github.com/llvm/llvm-project/pull/162621

Now we can start emitting `DW_AT_language_name`, make sure `DwarfUnit::getSourceLanguage` is equipped to handle this. Otherwise the new test-case would assert.

>From 14f0aee98749599fdb0de0f00697669acaf955b7 Mon Sep 17 00:00:00 2001
From: Michael Buch <michaelbuch12 at gmail.com>
Date: Tue, 7 Oct 2025 09:51:52 +0100
Subject: [PATCH 1/4] [llvm][DebugInfo] Support for versioned
 DISourceLanguageName

---
 llvm/include/llvm/AsmParser/LLToken.h         | 43 ++++++++--------
 llvm/lib/AsmParser/LLLexer.cpp                |  1 +
 llvm/lib/AsmParser/LLParser.cpp               | 49 ++++++++++++++++---
 llvm/lib/Bitcode/Reader/MetadataLoader.cpp    | 16 ++++--
 llvm/lib/Bitcode/Writer/BitcodeWriter.cpp     |  8 ++-
 llvm/lib/IR/AsmWriter.cpp                     | 14 ++++--
 ...compileunit-conflicting-language-fields.ll |  4 ++
 .../dicompileunit-invalid-language.ll         | 22 +++++++++
 .../invalid-dicompileunit-missing-language.ll |  2 +-
 .../Bitcode/dwarf-source-language-name.ll     | 15 ++++++
 10 files changed, 136 insertions(+), 38 deletions(-)
 create mode 100644 llvm/test/Assembler/dicompileunit-conflicting-language-fields.ll
 create mode 100644 llvm/test/Assembler/dicompileunit-invalid-language.ll
 create mode 100644 llvm/test/Bitcode/dwarf-source-language-name.ll

diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h
index d976d40e5e956..6de99fe182ad9 100644
--- a/llvm/include/llvm/AsmParser/LLToken.h
+++ b/llvm/include/llvm/AsmParser/LLToken.h
@@ -488,27 +488,28 @@ enum Kind {
   SummaryID,  // ^42
 
   // String valued tokens (StrVal).
-  LabelStr,         // foo:
-  GlobalVar,        // @foo @"foo"
-  ComdatVar,        // $foo
-  LocalVar,         // %foo %"foo"
-  MetadataVar,      // !foo
-  StringConstant,   // "foo"
-  DwarfTag,         // DW_TAG_foo
-  DwarfAttEncoding, // DW_ATE_foo
-  DwarfVirtuality,  // DW_VIRTUALITY_foo
-  DwarfLang,        // DW_LANG_foo
-  DwarfCC,          // DW_CC_foo
-  EmissionKind,     // lineTablesOnly
-  NameTableKind,    // GNU
-  FixedPointKind,   // Fixed point
-  DwarfOp,          // DW_OP_foo
-  DIFlag,           // DIFlagFoo
-  DISPFlag,         // DISPFlagFoo
-  DwarfMacinfo,     // DW_MACINFO_foo
-  ChecksumKind,     // CSK_foo
-  DbgRecordType,    // dbg_foo
-  DwarfEnumKind,    // DW_APPLE_ENUM_KIND_foo
+  LabelStr,            // foo:
+  GlobalVar,           // @foo @"foo"
+  ComdatVar,           // $foo
+  LocalVar,            // %foo %"foo"
+  MetadataVar,         // !foo
+  StringConstant,      // "foo"
+  DwarfTag,            // DW_TAG_foo
+  DwarfAttEncoding,    // DW_ATE_foo
+  DwarfVirtuality,     // DW_VIRTUALITY_foo
+  DwarfLang,           // DW_LANG_foo
+  DwarfSourceLangName, // DW_LNAME_foo
+  DwarfCC,             // DW_CC_foo
+  EmissionKind,        // lineTablesOnly
+  NameTableKind,       // GNU
+  FixedPointKind,      // Fixed point
+  DwarfOp,             // DW_OP_foo
+  DIFlag,              // DIFlagFoo
+  DISPFlag,            // DISPFlagFoo
+  DwarfMacinfo,        // DW_MACINFO_foo
+  ChecksumKind,        // CSK_foo
+  DbgRecordType,       // dbg_foo
+  DwarfEnumKind,       // DW_APPLE_ENUM_KIND_foo
 
   // Type valued tokens (TyVal).
   Type,
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index f6937d38eb38c..50d1d4730007a 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -982,6 +982,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   DWKEYWORD(ATE, DwarfAttEncoding);
   DWKEYWORD(VIRTUALITY, DwarfVirtuality);
   DWKEYWORD(LANG, DwarfLang);
+  DWKEYWORD(LNAME, DwarfSourceLangName);
   DWKEYWORD(CC, DwarfCC);
   DWKEYWORD(OP, DwarfOp);
   DWKEYWORD(MACINFO, DwarfMacinfo);
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 55899660fa84a..380b19296a3c4 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -4740,6 +4740,10 @@ struct DwarfLangField : public MDUnsignedField {
   DwarfLangField() : MDUnsignedField(0, dwarf::DW_LANG_hi_user) {}
 };
 
+struct DwarfSourceLangNameField : public MDUnsignedField {
+  DwarfSourceLangNameField() : MDUnsignedField(0, UINT32_MAX) {}
+};
+
 struct DwarfCCField : public MDUnsignedField {
   DwarfCCField() : MDUnsignedField(0, dwarf::DW_CC_hi_user) {}
 };
@@ -4997,6 +5001,25 @@ bool LLParser::parseMDField(LocTy Loc, StringRef Name, DwarfLangField &Result) {
   return false;
 }
 
+template <>
+bool LLParser::parseMDField(LocTy Loc, StringRef Name,
+                            DwarfSourceLangNameField &Result) {
+  if (Lex.getKind() == lltok::APSInt)
+    return parseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
+
+  if (Lex.getKind() != lltok::DwarfSourceLangName)
+    return tokError("expected DWARF source language name");
+
+  unsigned Lang = dwarf::getSourceLanguageName(Lex.getStrVal());
+  if (!Lang)
+    return tokError("invalid DWARF source language name" + Twine(" '") +
+                    Lex.getStrVal() + "'");
+  assert(Lang <= Result.Max && "Expected valid DWARF source language name");
+  Result.assign(Lang);
+  Lex.Lex();
+  return false;
+}
+
 template <>
 bool LLParser::parseMDField(LocTy Loc, StringRef Name, DwarfCCField &Result) {
   if (Lex.getKind() == lltok::APSInt)
@@ -5836,9 +5859,12 @@ bool LLParser::parseDICompileUnit(MDNode *&Result, bool IsDistinct) {
   if (!IsDistinct)
     return tokError("missing 'distinct', required for !DICompileUnit");
 
+  LocTy Loc = Lex.getLoc();
+
 #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
-  REQUIRED(language, DwarfLangField, );                                        \
   REQUIRED(file, MDField, (/* AllowNull */ false));                            \
+  OPTIONAL(language, DwarfLangField, );                                        \
+  OPTIONAL(sourceLanguageName, DwarfSourceLangNameField, );                    \
   OPTIONAL(producer, MDStringField, );                                         \
   OPTIONAL(isOptimized, MDBoolField, );                                        \
   OPTIONAL(flags, MDStringField, );                                            \
@@ -5860,12 +5886,23 @@ bool LLParser::parseDICompileUnit(MDNode *&Result, bool IsDistinct) {
   PARSE_MD_FIELDS();
 #undef VISIT_MD_FIELDS
 
+  if (!language.Seen && !sourceLanguageName.Seen)
+    return error(Loc, "missing one of 'language' or 'sourceLanguageName', "
+                      "required for !DICompileUnit");
+
+  if (language.Seen && sourceLanguageName.Seen)
+    return error(Loc, "can only specify one of 'language' and "
+                      "'sourceLanguageName' on !DICompileUnit");
+
   Result = DICompileUnit::getDistinct(
-      Context, DISourceLanguageName(language.Val), file.Val, producer.Val,
-      isOptimized.Val, flags.Val, runtimeVersion.Val, splitDebugFilename.Val,
-      emissionKind.Val, enums.Val, retainedTypes.Val, globals.Val, imports.Val,
-      macros.Val, dwoId.Val, splitDebugInlining.Val, debugInfoForProfiling.Val,
-      nameTableKind.Val, rangesBaseAddress.Val, sysroot.Val, sdk.Val);
+      Context,
+      language.Seen ? DISourceLanguageName(language.Val)
+                    : DISourceLanguageName(sourceLanguageName.Val, 0),
+      file.Val, producer.Val, isOptimized.Val, flags.Val, runtimeVersion.Val,
+      splitDebugFilename.Val, emissionKind.Val, enums.Val, retainedTypes.Val,
+      globals.Val, imports.Val, macros.Val, dwoId.Val, splitDebugInlining.Val,
+      debugInfoForProfiling.Val, nameTableKind.Val, rangesBaseAddress.Val,
+      sysroot.Val, sdk.Val);
   return false;
 }
 
diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
index a4d1b8372dfac..cdcf7a80ffac7 100644
--- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -1867,12 +1867,18 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
     // distinct.  It's always distinct.
     IsDistinct = true;
 
+    const auto LangVersionMask = (uint64_t(1) << 63);
+    const bool HasVersionedLanguage = Record[1] & LangVersionMask;
+
     auto *CU = DICompileUnit::getDistinct(
-        Context, DISourceLanguageName(Record[1]), getMDOrNull(Record[2]),
-        getMDString(Record[3]), Record[4], getMDString(Record[5]), Record[6],
-        getMDString(Record[7]), Record[8], getMDOrNull(Record[9]),
-        getMDOrNull(Record[10]), getMDOrNull(Record[12]),
-        getMDOrNull(Record[13]),
+        Context,
+        HasVersionedLanguage
+            ? DISourceLanguageName(Record[1] & ~LangVersionMask, 0)
+            : DISourceLanguageName(Record[1]),
+        getMDOrNull(Record[2]), getMDString(Record[3]), Record[4],
+        getMDString(Record[5]), Record[6], getMDString(Record[7]), Record[8],
+        getMDOrNull(Record[9]), getMDOrNull(Record[10]),
+        getMDOrNull(Record[12]), getMDOrNull(Record[13]),
         Record.size() <= 15 ? nullptr : getMDOrNull(Record[15]),
         Record.size() <= 14 ? 0 : Record[14],
         Record.size() <= 16 ? true : Record[16],
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 7ed140d392fca..0ca55a26f8013 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -2108,7 +2108,13 @@ void ModuleBitcodeWriter::writeDICompileUnit(const DICompileUnit *N,
   assert(N->isDistinct() && "Expected distinct compile units");
   Record.push_back(/* IsDistinct */ true);
 
-  Record.push_back(N->getSourceLanguage().getUnversionedName());
+  auto Lang = N->getSourceLanguage();
+  Record.push_back(Lang.getName());
+  // Set bit so the MetadataLoader can distniguish between versioned and
+  // unversioned names.
+  if (Lang.hasVersionedName())
+    Record.back() ^= (uint64_t(1) << 63);
+
   Record.push_back(VE.getMetadataOrNullID(N->getFile()));
   Record.push_back(VE.getMetadataOrNullID(N->getRawProducer()));
   Record.push_back(N->isOptimized());
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index ae086bcd3902d..0bc877d1c4471 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -2370,10 +2370,16 @@ static void writeDICompileUnit(raw_ostream &Out, const DICompileUnit *N,
   Out << "!DICompileUnit(";
   MDFieldPrinter Printer(Out, WriterCtx);
 
-  Printer.printDwarfEnum("language",
-                         N->getSourceLanguage().getUnversionedName(),
-                         dwarf::LanguageString,
-                         /* ShouldSkipZero */ false);
+  auto Lang = N->getSourceLanguage();
+  if (Lang.hasVersionedName())
+    Printer.printDwarfEnum(
+        "sourceLanguageName",
+        static_cast<llvm::dwarf::SourceLanguageName>(Lang.getName()),
+        dwarf::SourceLanguageNameString,
+        /* ShouldSkipZero */ false);
+  else
+    Printer.printDwarfEnum("language", Lang.getName(), dwarf::LanguageString,
+                           /* ShouldSkipZero */ false);
 
   Printer.printMetadata("file", N->getRawFile(), /* ShouldSkipNull */ false);
   Printer.printString("producer", N->getProducer());
diff --git a/llvm/test/Assembler/dicompileunit-conflicting-language-fields.ll b/llvm/test/Assembler/dicompileunit-conflicting-language-fields.ll
new file mode 100644
index 0000000000000..3aad27ba8a87a
--- /dev/null
+++ b/llvm/test/Assembler/dicompileunit-conflicting-language-fields.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+
+; CHECK: <stdin>:[[@LINE+1]]:15: error: can only specify one of 'language' and 'sourceLanguageName' on !DICompileUnit
+!0 = distinct !DICompileUnit(language: DW_LANG_C, sourceLanguageName: DW_LNAME_C, file: !DIFile(filename: "a", directory: "b"))
diff --git a/llvm/test/Assembler/dicompileunit-invalid-language.ll b/llvm/test/Assembler/dicompileunit-invalid-language.ll
new file mode 100644
index 0000000000000..da93c4f20e333
--- /dev/null
+++ b/llvm/test/Assembler/dicompileunit-invalid-language.ll
@@ -0,0 +1,22 @@
+; RUN: split-file %s %t
+; RUN: not llvm-as < %t/invalid_dw_lang.ll -disable-output 2>&1 | FileCheck %s --check-prefix=INVALID_DW_LANG
+; RUN: not llvm-as < %t/invalid_dw_lang_2.ll -disable-output 2>&1 | FileCheck %s --check-prefix=INVALID_DW_LANG_2
+; RUN: not llvm-as < %t/invalid_dw_lname.ll -disable-output 2>&1 | FileCheck %s --check-prefix=INVALID_DW_LNAME
+; RUN: not llvm-as < %t/invalid_dw_lname_2.ll -disable-output 2>&1 | FileCheck %s --check-prefix=INVALID_DW_LNAME_2
+
+; INVALID_DW_LANG:    invalid DWARF language 'DW_LANG_blah'
+; INVALID_DW_LANG_2:  expected DWARF language
+; INVALID_DW_LNAME:   invalid DWARF source language name 'DW_LNAME_blah'
+; INVALID_DW_LNAME_2: expected DWARF source language name
+
+;--- invalid_dw_lang.ll
+!0 = distinct !DICompileUnit(language: DW_LANG_blah)
+
+;--- invalid_dw_lang_2.ll
+!0 = distinct !DICompileUnit(language: DW_LNAME_C)
+
+;--- invalid_dw_lname.ll
+!0 = distinct !DICompileUnit(sourceLanguageName: DW_LNAME_blah)
+
+;--- invalid_dw_lname_2.ll
+!0 = distinct !DICompileUnit(sourceLanguageName: DW_LANG_C)
diff --git a/llvm/test/Assembler/invalid-dicompileunit-missing-language.ll b/llvm/test/Assembler/invalid-dicompileunit-missing-language.ll
index 8e4cb0261dbbf..ebc86e3410d66 100644
--- a/llvm/test/Assembler/invalid-dicompileunit-missing-language.ll
+++ b/llvm/test/Assembler/invalid-dicompileunit-missing-language.ll
@@ -1,4 +1,4 @@
 ; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
 
-; CHECK: <stdin>:[[@LINE+1]]:74: error: missing required field 'language'
+; CHECK: <stdin>:[[@LINE+1]]:15: error: missing one of 'language' or 'sourceLanguageName', required for !DICompileUnit
 !0 = distinct !DICompileUnit(file: !DIFile(filename: "a", directory: "b"))
diff --git a/llvm/test/Bitcode/dwarf-source-language-name.ll b/llvm/test/Bitcode/dwarf-source-language-name.ll
new file mode 100644
index 0000000000000..e9893638f61b5
--- /dev/null
+++ b/llvm/test/Bitcode/dwarf-source-language-name.ll
@@ -0,0 +1,15 @@
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
+
+; CHECK: sourceLanguageName: DW_LNAME_ObjC_plus_plus
+
+source_filename = "cu.cpp"
+target triple = "arm64-apple-macosx"
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+
+!0 = distinct !DICompileUnit(sourceLanguageName: DW_LNAME_ObjC_plus_plus, file: !1, producer: "handwritten", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !2, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/")
+!1 = !DIFile(filename: "cu.cpp", directory: "/tmp")
+!2 = !{}
+!3 = !{i32 7, !"Dwarf Version", i32 5}
+!4 = !{i32 2, !"Debug Info Version", i32 3}

>From 5e2ac17d162f536a7cbe2abbce8ff8bcd9b13ec9 Mon Sep 17 00:00:00 2001
From: Michael Buch <michaelbuch12 at gmail.com>
Date: Wed, 8 Oct 2025 10:44:54 +0100
Subject: [PATCH 2/4] [llvm-dwarfdump] Support for DW_AT_language_name and
 DW_AT_language_version

Makes sure we dump the attributes in a user-friendly way.

(cherry picked from commit e03b759ee90c4c7326690524557e1dd06ba96a7c)
---
 llvm/lib/BinaryFormat/Dwarf.cpp               |  2 ++
 llvm/lib/DebugInfo/DWARF/DWARFDie.cpp         |  3 +-
 .../llvm-dwarfdump/X86/DW_AT_language_name.s  | 35 +++++++++++++++++++
 .../X86/DW_AT_language_version.s              | 35 +++++++++++++++++++
 4 files changed, 74 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/tools/llvm-dwarfdump/X86/DW_AT_language_name.s
 create mode 100644 llvm/test/tools/llvm-dwarfdump/X86/DW_AT_language_version.s

diff --git a/llvm/lib/BinaryFormat/Dwarf.cpp b/llvm/lib/BinaryFormat/Dwarf.cpp
index 969047a8e5b5c..55fa2df632bfa 100644
--- a/llvm/lib/BinaryFormat/Dwarf.cpp
+++ b/llvm/lib/BinaryFormat/Dwarf.cpp
@@ -893,6 +893,8 @@ StringRef llvm::dwarf::AttributeValueString(uint16_t Attr, unsigned Val) {
     return DefaultedMemberString(Val);
   case DW_AT_APPLE_enum_kind:
     return EnumKindString(Val);
+  case DW_AT_language_name:
+    return SourceLanguageNameString(static_cast<SourceLanguageName>(Val));
   }
 
   return StringRef();
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
index edc69a36cdff2..212a0c039298b 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
@@ -149,7 +149,8 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
   if (!Name.empty())
     WithColor(OS, Color) << Name;
   else if (Attr == DW_AT_decl_line || Attr == DW_AT_decl_column ||
-           Attr == DW_AT_call_line || Attr == DW_AT_call_column) {
+           Attr == DW_AT_call_line || Attr == DW_AT_call_column ||
+           Attr == DW_AT_language_version) {
     if (std::optional<uint64_t> Val = FormValue.getAsUnsignedConstant())
       OS << *Val;
     else
diff --git a/llvm/test/tools/llvm-dwarfdump/X86/DW_AT_language_name.s b/llvm/test/tools/llvm-dwarfdump/X86/DW_AT_language_name.s
new file mode 100644
index 0000000000000..af581753be56a
--- /dev/null
+++ b/llvm/test/tools/llvm-dwarfdump/X86/DW_AT_language_name.s
@@ -0,0 +1,35 @@
+# Demonstrate dumping DW_AT_language_name.
+# RUN: llvm-mc -triple=x86_64--linux -filetype=obj < %s | \
+# RUN:     llvm-dwarfdump -v - | FileCheck %s
+
+# CHECK: .debug_abbrev contents:
+# CHECK: DW_AT_language_name DW_FORM_data2
+# CHECK: DW_AT_language_name DW_FORM_data2
+# CHECK: .debug_info contents:
+# CHECK: DW_AT_language_name [DW_FORM_data2] (DW_LNAME_C)
+# CHECK: DW_AT_language_name [DW_FORM_data2] (0x0000)
+
+        .section        .debug_abbrev,"", at progbits
+        .byte   1                       # Abbreviation Code
+        .byte   17                      # DW_TAG_compile_unit
+        .byte   1                       # DW_CHILDREN_no 
+        .ascii  "\220\001"              # DW_AT_language_name
+        .byte   5                       # DW_FORM_data2
+        .ascii  "\220\001"              # DW_AT_language_name
+        .byte   5                       # DW_FORM_data2
+        .byte   0                       # EOM(1)
+        .byte   0                       # EOM(2)
+        .byte   0                       # EOM(3)
+
+        .section        .debug_info,"", at progbits
+        .long   .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+        .short  5                       # DWARF version number
+        .byte   1                       # Unit type
+        .byte   8                       # Address Size (in bytes)
+        .long   .debug_abbrev           # Offset Into Abbrev. Section
+        .byte   1                       # Abbrev [1] DW_TAG_compile_unit
+        .short  3                       # DW_AT_language_name
+        .short  0                       # DW_AT_language_name
+        .byte   0 
+.Ldebug_info_end0:
diff --git a/llvm/test/tools/llvm-dwarfdump/X86/DW_AT_language_version.s b/llvm/test/tools/llvm-dwarfdump/X86/DW_AT_language_version.s
new file mode 100644
index 0000000000000..f1be8fdf6cd3e
--- /dev/null
+++ b/llvm/test/tools/llvm-dwarfdump/X86/DW_AT_language_version.s
@@ -0,0 +1,35 @@
+# Demonstrate dumping DW_AT_language_version.
+# RUN: llvm-mc -triple=x86_64--linux -filetype=obj < %s | \
+# RUN:     llvm-dwarfdump -v - | FileCheck %s
+
+# CHECK: .debug_abbrev contents:
+# CHECK: DW_AT_language_version DW_FORM_data4
+# CHECK: DW_AT_language_version DW_FORM_data2
+# CHECK: .debug_info contents:
+# CHECK: DW_AT_language_version [DW_FORM_data4] (201402)
+# CHECK: DW_AT_language_version [DW_FORM_data2] (0)
+
+        .section        .debug_abbrev,"", at progbits
+        .byte   1                       # Abbreviation Code
+        .byte   17                      # DW_TAG_compile_unit
+        .byte   1                       # DW_CHILDREN_no 
+        .ascii  "\221\001"              # DW_AT_language_version
+        .byte   6                       # DW_FORM_data4
+        .ascii  "\221\001"              # DW_AT_language_version
+        .byte   5                       # DW_FORM_data2
+        .byte   0                       # EOM(1)
+        .byte   0                       # EOM(2)
+        .byte   0                       # EOM(3)
+
+        .section        .debug_info,"", at progbits
+        .long   .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+        .short  5                       # DWARF version number
+        .byte   1                       # Unit type
+        .byte   8                       # Address Size (in bytes)
+        .long   .debug_abbrev           # Offset Into Abbrev. Section
+        .byte   1                       # Abbrev [1] DW_TAG_compile_unit
+        .long   201402                  # DW_AT_language_version
+        .short  0                       # DW_AT_language_version
+        .byte   0 
+.Ldebug_info_end0:

>From b9e4b548ef63f5760f30cd2a7bf6285791729573 Mon Sep 17 00:00:00 2001
From: Michael Buch <michaelbuch12 at gmail.com>
Date: Tue, 7 Oct 2025 10:03:45 +0100
Subject: [PATCH 3/4] [llvm][DebugInfo] Emit DW_AT_lanugage_name

---
 llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp    |  8 ++++++--
 .../compileunit-source-language-name.ll       | 20 +++++++++++++++++++
 .../Generic/compileunit-source-language.ll    | 20 +++++++++++++++++++
 3 files changed, 46 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/DebugInfo/Generic/compileunit-source-language-name.ll
 create mode 100644 llvm/test/DebugInfo/Generic/compileunit-source-language.ll

diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index d751a7f9f01ef..433877f3a8b98 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -1039,8 +1039,12 @@ void DwarfDebug::finishUnitAttributes(const DICompileUnit *DIUnit,
   } else
     NewCU.addString(Die, dwarf::DW_AT_producer, Producer);
 
-  NewCU.addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
-                DIUnit->getSourceLanguage().getUnversionedName());
+  if (auto Lang = DIUnit->getSourceLanguage(); Lang.hasVersionedName())
+    NewCU.addUInt(Die, dwarf::DW_AT_language_name, dwarf::DW_FORM_data2,
+                  Lang.getName());
+  else
+    NewCU.addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
+                  Lang.getName());
 
   NewCU.addString(Die, dwarf::DW_AT_name, FN);
   StringRef SysRoot = DIUnit->getSysRoot();
diff --git a/llvm/test/DebugInfo/Generic/compileunit-source-language-name.ll b/llvm/test/DebugInfo/Generic/compileunit-source-language-name.ll
new file mode 100644
index 0000000000000..94e32ab8c65b2
--- /dev/null
+++ b/llvm/test/DebugInfo/Generic/compileunit-source-language-name.ll
@@ -0,0 +1,20 @@
+; RUN: %llc_dwarf -filetype=obj -O0 < %s | llvm-dwarfdump -debug-info - | FileCheck %s --implicit-check-not "DW_AT_language"
+
+; CHECK: DW_AT_language_name (DW_LNAME_ObjC_plus_plus)
+
+source_filename = "cu.cpp"
+target triple = "arm64-apple-macosx"
+
+ at x = global i32 0, align 4, !dbg !0
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!6, !7}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(sourceLanguageName: DW_LNAME_ObjC_plus_plus, file: !3, producer: "handwritten", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/")
+!3 = !DIFile(filename: "cu.cpp", directory: "/tmp")
+!4 = !{!0}
+!5 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!6 = !{i32 7, !"Dwarf Version", i32 5}
+!7 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/llvm/test/DebugInfo/Generic/compileunit-source-language.ll b/llvm/test/DebugInfo/Generic/compileunit-source-language.ll
new file mode 100644
index 0000000000000..0d0da4fa11d64
--- /dev/null
+++ b/llvm/test/DebugInfo/Generic/compileunit-source-language.ll
@@ -0,0 +1,20 @@
+; RUN: %llc_dwarf -filetype=obj -O0 < %s | llvm-dwarfdump -debug-info - | FileCheck %s --implicit-check-not "DW_AT_language_name"
+
+; CHECK: DW_AT_language (DW_LANG_C)
+
+source_filename = "cu.cpp"
+target triple = "arm64-apple-macosx"
+
+ at x = global i32 0, align 4, !dbg !0
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!6, !7}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C, file: !3, producer: "handwritten", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/")
+!3 = !DIFile(filename: "cu.cpp", directory: "/tmp")
+!4 = !{!0}
+!5 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!6 = !{i32 7, !"Dwarf Version", i32 5}
+!7 = !{i32 2, !"Debug Info Version", i32 3}

>From 0c83bb50f13614689879c3dfeca58ac6ca19a524 Mon Sep 17 00:00:00 2001
From: Michael Buch <michaelbuch12 at gmail.com>
Date: Thu, 9 Oct 2025 11:00:54 +0100
Subject: [PATCH 4/4] [llvm][DebugInfo] Support versioned source language names
 in DwarfUnit

---
 llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp             | 11 +++++++++--
 .../Generic/compileunit-source-language-name.ll       |  9 +++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index aa078f3f81d49..b58536f89ccb5 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -705,8 +705,15 @@ void DwarfUnit::addType(DIE &Entity, const DIType *Ty,
 }
 
 llvm::dwarf::SourceLanguage DwarfUnit::getSourceLanguage() const {
-  return static_cast<llvm::dwarf::SourceLanguage>(
-      getLanguage().getUnversionedName());
+  const auto &Lang = getLanguage();
+
+  if (!Lang.hasVersionedName())
+    return static_cast<llvm::dwarf::SourceLanguage>(Lang.getName());
+
+  return llvm::dwarf::toDW_LANG(
+             static_cast<llvm::dwarf::SourceLanguageName>(Lang.getName()),
+             Lang.getVersion())
+      .value_or(llvm::dwarf::DW_LANG_hi_user);
 }
 
 std::string DwarfUnit::getParentContextString(const DIScope *Context) const {
diff --git a/llvm/test/DebugInfo/Generic/compileunit-source-language-name.ll b/llvm/test/DebugInfo/Generic/compileunit-source-language-name.ll
index 94e32ab8c65b2..edb4e669d4631 100644
--- a/llvm/test/DebugInfo/Generic/compileunit-source-language-name.ll
+++ b/llvm/test/DebugInfo/Generic/compileunit-source-language-name.ll
@@ -7,6 +7,11 @@ target triple = "arm64-apple-macosx"
 
 @x = global i32 0, align 4, !dbg !0
 
+; Function Attrs: mustprogress noinline nounwind optnone ssp uwtable(sync)
+define void @_Z4funcv() !dbg !8 {
+  ret void, !dbg !11
+}
+
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!6, !7}
 
@@ -18,3 +23,7 @@ target triple = "arm64-apple-macosx"
 !5 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
 !6 = !{i32 7, !"Dwarf Version", i32 5}
 !7 = !{i32 2, !"Debug Info Version", i32 3}
+!8 = distinct !DISubprogram(name: "func", linkageName: "_Z4funcv", scope: !3, file: !3, line: 2, type: !9, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2)
+!9 = !DISubroutineType(types: !10)
+!10 = !{null}
+!11 = !DILocation(line: 2, column: 14, scope: !8)



More information about the llvm-commits mailing list