[llvm] [llvm][DebugInfo] Add 'sourceLanguageName' field support to DICompileUnit (PR #162445)

Michael Buch via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 8 10:33:40 PDT 2025


https://github.com/Michael137 updated https://github.com/llvm/llvm-project/pull/162445

>From 84cf912dd019edb79e069383f159fb8121312473 Mon Sep 17 00:00:00 2001
From: Michael Buch <michaelbuch12 at gmail.com>
Date: Tue, 7 Oct 2025 09:51:52 +0100
Subject: [PATCH] [llvm][DebugInfo] Support for versioned DISourceLanguageName

---
 llvm/include/llvm/AsmParser/LLToken.h         | 43 ++++++++--------
 llvm/lib/AsmParser/LLLexer.cpp                |  1 +
 llvm/lib/AsmParser/LLParser.cpp               | 49 ++++++++++++++++---
 llvm/lib/Bitcode/Reader/MetadataLoader.cpp    | 16 ++++--
 llvm/lib/Bitcode/Writer/BitcodeWriter.cpp     |  8 ++-
 llvm/lib/IR/AsmWriter.cpp                     | 14 ++++--
 ...compileunit-conflicting-language-fields.ll |  4 ++
 .../dicompileunit-invalid-language.ll         | 22 +++++++++
 .../invalid-dicompileunit-missing-language.ll |  2 +-
 .../Bitcode/dwarf-source-language-name.ll     | 15 ++++++
 10 files changed, 136 insertions(+), 38 deletions(-)
 create mode 100644 llvm/test/Assembler/dicompileunit-conflicting-language-fields.ll
 create mode 100644 llvm/test/Assembler/dicompileunit-invalid-language.ll
 create mode 100644 llvm/test/Bitcode/dwarf-source-language-name.ll

diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h
index d976d40e5e956..6de99fe182ad9 100644
--- a/llvm/include/llvm/AsmParser/LLToken.h
+++ b/llvm/include/llvm/AsmParser/LLToken.h
@@ -488,27 +488,28 @@ enum Kind {
   SummaryID,  // ^42
 
   // String valued tokens (StrVal).
-  LabelStr,         // foo:
-  GlobalVar,        // @foo @"foo"
-  ComdatVar,        // $foo
-  LocalVar,         // %foo %"foo"
-  MetadataVar,      // !foo
-  StringConstant,   // "foo"
-  DwarfTag,         // DW_TAG_foo
-  DwarfAttEncoding, // DW_ATE_foo
-  DwarfVirtuality,  // DW_VIRTUALITY_foo
-  DwarfLang,        // DW_LANG_foo
-  DwarfCC,          // DW_CC_foo
-  EmissionKind,     // lineTablesOnly
-  NameTableKind,    // GNU
-  FixedPointKind,   // Fixed point
-  DwarfOp,          // DW_OP_foo
-  DIFlag,           // DIFlagFoo
-  DISPFlag,         // DISPFlagFoo
-  DwarfMacinfo,     // DW_MACINFO_foo
-  ChecksumKind,     // CSK_foo
-  DbgRecordType,    // dbg_foo
-  DwarfEnumKind,    // DW_APPLE_ENUM_KIND_foo
+  LabelStr,            // foo:
+  GlobalVar,           // @foo @"foo"
+  ComdatVar,           // $foo
+  LocalVar,            // %foo %"foo"
+  MetadataVar,         // !foo
+  StringConstant,      // "foo"
+  DwarfTag,            // DW_TAG_foo
+  DwarfAttEncoding,    // DW_ATE_foo
+  DwarfVirtuality,     // DW_VIRTUALITY_foo
+  DwarfLang,           // DW_LANG_foo
+  DwarfSourceLangName, // DW_LNAME_foo
+  DwarfCC,             // DW_CC_foo
+  EmissionKind,        // lineTablesOnly
+  NameTableKind,       // GNU
+  FixedPointKind,      // Fixed point
+  DwarfOp,             // DW_OP_foo
+  DIFlag,              // DIFlagFoo
+  DISPFlag,            // DISPFlagFoo
+  DwarfMacinfo,        // DW_MACINFO_foo
+  ChecksumKind,        // CSK_foo
+  DbgRecordType,       // dbg_foo
+  DwarfEnumKind,       // DW_APPLE_ENUM_KIND_foo
 
   // Type valued tokens (TyVal).
   Type,
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index f6937d38eb38c..50d1d4730007a 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -982,6 +982,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   DWKEYWORD(ATE, DwarfAttEncoding);
   DWKEYWORD(VIRTUALITY, DwarfVirtuality);
   DWKEYWORD(LANG, DwarfLang);
+  DWKEYWORD(LNAME, DwarfSourceLangName);
   DWKEYWORD(CC, DwarfCC);
   DWKEYWORD(OP, DwarfOp);
   DWKEYWORD(MACINFO, DwarfMacinfo);
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 55899660fa84a..380b19296a3c4 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -4740,6 +4740,10 @@ struct DwarfLangField : public MDUnsignedField {
   DwarfLangField() : MDUnsignedField(0, dwarf::DW_LANG_hi_user) {}
 };
 
+struct DwarfSourceLangNameField : public MDUnsignedField {
+  DwarfSourceLangNameField() : MDUnsignedField(0, UINT32_MAX) {}
+};
+
 struct DwarfCCField : public MDUnsignedField {
   DwarfCCField() : MDUnsignedField(0, dwarf::DW_CC_hi_user) {}
 };
@@ -4997,6 +5001,25 @@ bool LLParser::parseMDField(LocTy Loc, StringRef Name, DwarfLangField &Result) {
   return false;
 }
 
+template <>
+bool LLParser::parseMDField(LocTy Loc, StringRef Name,
+                            DwarfSourceLangNameField &Result) {
+  if (Lex.getKind() == lltok::APSInt)
+    return parseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
+
+  if (Lex.getKind() != lltok::DwarfSourceLangName)
+    return tokError("expected DWARF source language name");
+
+  unsigned Lang = dwarf::getSourceLanguageName(Lex.getStrVal());
+  if (!Lang)
+    return tokError("invalid DWARF source language name" + Twine(" '") +
+                    Lex.getStrVal() + "'");
+  assert(Lang <= Result.Max && "Expected valid DWARF source language name");
+  Result.assign(Lang);
+  Lex.Lex();
+  return false;
+}
+
 template <>
 bool LLParser::parseMDField(LocTy Loc, StringRef Name, DwarfCCField &Result) {
   if (Lex.getKind() == lltok::APSInt)
@@ -5836,9 +5859,12 @@ bool LLParser::parseDICompileUnit(MDNode *&Result, bool IsDistinct) {
   if (!IsDistinct)
     return tokError("missing 'distinct', required for !DICompileUnit");
 
+  LocTy Loc = Lex.getLoc();
+
 #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
-  REQUIRED(language, DwarfLangField, );                                        \
   REQUIRED(file, MDField, (/* AllowNull */ false));                            \
+  OPTIONAL(language, DwarfLangField, );                                        \
+  OPTIONAL(sourceLanguageName, DwarfSourceLangNameField, );                    \
   OPTIONAL(producer, MDStringField, );                                         \
   OPTIONAL(isOptimized, MDBoolField, );                                        \
   OPTIONAL(flags, MDStringField, );                                            \
@@ -5860,12 +5886,23 @@ bool LLParser::parseDICompileUnit(MDNode *&Result, bool IsDistinct) {
   PARSE_MD_FIELDS();
 #undef VISIT_MD_FIELDS
 
+  if (!language.Seen && !sourceLanguageName.Seen)
+    return error(Loc, "missing one of 'language' or 'sourceLanguageName', "
+                      "required for !DICompileUnit");
+
+  if (language.Seen && sourceLanguageName.Seen)
+    return error(Loc, "can only specify one of 'language' and "
+                      "'sourceLanguageName' on !DICompileUnit");
+
   Result = DICompileUnit::getDistinct(
-      Context, DISourceLanguageName(language.Val), file.Val, producer.Val,
-      isOptimized.Val, flags.Val, runtimeVersion.Val, splitDebugFilename.Val,
-      emissionKind.Val, enums.Val, retainedTypes.Val, globals.Val, imports.Val,
-      macros.Val, dwoId.Val, splitDebugInlining.Val, debugInfoForProfiling.Val,
-      nameTableKind.Val, rangesBaseAddress.Val, sysroot.Val, sdk.Val);
+      Context,
+      language.Seen ? DISourceLanguageName(language.Val)
+                    : DISourceLanguageName(sourceLanguageName.Val, 0),
+      file.Val, producer.Val, isOptimized.Val, flags.Val, runtimeVersion.Val,
+      splitDebugFilename.Val, emissionKind.Val, enums.Val, retainedTypes.Val,
+      globals.Val, imports.Val, macros.Val, dwoId.Val, splitDebugInlining.Val,
+      debugInfoForProfiling.Val, nameTableKind.Val, rangesBaseAddress.Val,
+      sysroot.Val, sdk.Val);
   return false;
 }
 
diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
index a4d1b8372dfac..cdcf7a80ffac7 100644
--- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -1867,12 +1867,18 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
     // distinct.  It's always distinct.
     IsDistinct = true;
 
+    const auto LangVersionMask = (uint64_t(1) << 63);
+    const bool HasVersionedLanguage = Record[1] & LangVersionMask;
+
     auto *CU = DICompileUnit::getDistinct(
-        Context, DISourceLanguageName(Record[1]), getMDOrNull(Record[2]),
-        getMDString(Record[3]), Record[4], getMDString(Record[5]), Record[6],
-        getMDString(Record[7]), Record[8], getMDOrNull(Record[9]),
-        getMDOrNull(Record[10]), getMDOrNull(Record[12]),
-        getMDOrNull(Record[13]),
+        Context,
+        HasVersionedLanguage
+            ? DISourceLanguageName(Record[1] & ~LangVersionMask, 0)
+            : DISourceLanguageName(Record[1]),
+        getMDOrNull(Record[2]), getMDString(Record[3]), Record[4],
+        getMDString(Record[5]), Record[6], getMDString(Record[7]), Record[8],
+        getMDOrNull(Record[9]), getMDOrNull(Record[10]),
+        getMDOrNull(Record[12]), getMDOrNull(Record[13]),
         Record.size() <= 15 ? nullptr : getMDOrNull(Record[15]),
         Record.size() <= 14 ? 0 : Record[14],
         Record.size() <= 16 ? true : Record[16],
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 7ed140d392fca..0ca55a26f8013 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -2108,7 +2108,13 @@ void ModuleBitcodeWriter::writeDICompileUnit(const DICompileUnit *N,
   assert(N->isDistinct() && "Expected distinct compile units");
   Record.push_back(/* IsDistinct */ true);
 
-  Record.push_back(N->getSourceLanguage().getUnversionedName());
+  auto Lang = N->getSourceLanguage();
+  Record.push_back(Lang.getName());
+  // Set bit so the MetadataLoader can distniguish between versioned and
+  // unversioned names.
+  if (Lang.hasVersionedName())
+    Record.back() ^= (uint64_t(1) << 63);
+
   Record.push_back(VE.getMetadataOrNullID(N->getFile()));
   Record.push_back(VE.getMetadataOrNullID(N->getRawProducer()));
   Record.push_back(N->isOptimized());
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index ae086bcd3902d..0bc877d1c4471 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -2370,10 +2370,16 @@ static void writeDICompileUnit(raw_ostream &Out, const DICompileUnit *N,
   Out << "!DICompileUnit(";
   MDFieldPrinter Printer(Out, WriterCtx);
 
-  Printer.printDwarfEnum("language",
-                         N->getSourceLanguage().getUnversionedName(),
-                         dwarf::LanguageString,
-                         /* ShouldSkipZero */ false);
+  auto Lang = N->getSourceLanguage();
+  if (Lang.hasVersionedName())
+    Printer.printDwarfEnum(
+        "sourceLanguageName",
+        static_cast<llvm::dwarf::SourceLanguageName>(Lang.getName()),
+        dwarf::SourceLanguageNameString,
+        /* ShouldSkipZero */ false);
+  else
+    Printer.printDwarfEnum("language", Lang.getName(), dwarf::LanguageString,
+                           /* ShouldSkipZero */ false);
 
   Printer.printMetadata("file", N->getRawFile(), /* ShouldSkipNull */ false);
   Printer.printString("producer", N->getProducer());
diff --git a/llvm/test/Assembler/dicompileunit-conflicting-language-fields.ll b/llvm/test/Assembler/dicompileunit-conflicting-language-fields.ll
new file mode 100644
index 0000000000000..3aad27ba8a87a
--- /dev/null
+++ b/llvm/test/Assembler/dicompileunit-conflicting-language-fields.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+
+; CHECK: <stdin>:[[@LINE+1]]:15: error: can only specify one of 'language' and 'sourceLanguageName' on !DICompileUnit
+!0 = distinct !DICompileUnit(language: DW_LANG_C, sourceLanguageName: DW_LNAME_C, file: !DIFile(filename: "a", directory: "b"))
diff --git a/llvm/test/Assembler/dicompileunit-invalid-language.ll b/llvm/test/Assembler/dicompileunit-invalid-language.ll
new file mode 100644
index 0000000000000..da93c4f20e333
--- /dev/null
+++ b/llvm/test/Assembler/dicompileunit-invalid-language.ll
@@ -0,0 +1,22 @@
+; RUN: split-file %s %t
+; RUN: not llvm-as < %t/invalid_dw_lang.ll -disable-output 2>&1 | FileCheck %s --check-prefix=INVALID_DW_LANG
+; RUN: not llvm-as < %t/invalid_dw_lang_2.ll -disable-output 2>&1 | FileCheck %s --check-prefix=INVALID_DW_LANG_2
+; RUN: not llvm-as < %t/invalid_dw_lname.ll -disable-output 2>&1 | FileCheck %s --check-prefix=INVALID_DW_LNAME
+; RUN: not llvm-as < %t/invalid_dw_lname_2.ll -disable-output 2>&1 | FileCheck %s --check-prefix=INVALID_DW_LNAME_2
+
+; INVALID_DW_LANG:    invalid DWARF language 'DW_LANG_blah'
+; INVALID_DW_LANG_2:  expected DWARF language
+; INVALID_DW_LNAME:   invalid DWARF source language name 'DW_LNAME_blah'
+; INVALID_DW_LNAME_2: expected DWARF source language name
+
+;--- invalid_dw_lang.ll
+!0 = distinct !DICompileUnit(language: DW_LANG_blah)
+
+;--- invalid_dw_lang_2.ll
+!0 = distinct !DICompileUnit(language: DW_LNAME_C)
+
+;--- invalid_dw_lname.ll
+!0 = distinct !DICompileUnit(sourceLanguageName: DW_LNAME_blah)
+
+;--- invalid_dw_lname_2.ll
+!0 = distinct !DICompileUnit(sourceLanguageName: DW_LANG_C)
diff --git a/llvm/test/Assembler/invalid-dicompileunit-missing-language.ll b/llvm/test/Assembler/invalid-dicompileunit-missing-language.ll
index 8e4cb0261dbbf..ebc86e3410d66 100644
--- a/llvm/test/Assembler/invalid-dicompileunit-missing-language.ll
+++ b/llvm/test/Assembler/invalid-dicompileunit-missing-language.ll
@@ -1,4 +1,4 @@
 ; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
 
-; CHECK: <stdin>:[[@LINE+1]]:74: error: missing required field 'language'
+; CHECK: <stdin>:[[@LINE+1]]:15: error: missing one of 'language' or 'sourceLanguageName', required for !DICompileUnit
 !0 = distinct !DICompileUnit(file: !DIFile(filename: "a", directory: "b"))
diff --git a/llvm/test/Bitcode/dwarf-source-language-name.ll b/llvm/test/Bitcode/dwarf-source-language-name.ll
new file mode 100644
index 0000000000000..e9893638f61b5
--- /dev/null
+++ b/llvm/test/Bitcode/dwarf-source-language-name.ll
@@ -0,0 +1,15 @@
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
+
+; CHECK: sourceLanguageName: DW_LNAME_ObjC_plus_plus
+
+source_filename = "cu.cpp"
+target triple = "arm64-apple-macosx"
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+
+!0 = distinct !DICompileUnit(sourceLanguageName: DW_LNAME_ObjC_plus_plus, file: !1, producer: "handwritten", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !2, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/")
+!1 = !DIFile(filename: "cu.cpp", directory: "/tmp")
+!2 = !{}
+!3 = !{i32 7, !"Dwarf Version", i32 5}
+!4 = !{i32 2, !"Debug Info Version", i32 3}



More information about the llvm-commits mailing list