[llvm] [llvm][DebugInfo] Add 'sourceLanguageName' field support to DICompileUnit (PR #162445)
Michael Buch via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 8 10:33:40 PDT 2025
https://github.com/Michael137 updated https://github.com/llvm/llvm-project/pull/162445
>From 84cf912dd019edb79e069383f159fb8121312473 Mon Sep 17 00:00:00 2001
From: Michael Buch <michaelbuch12 at gmail.com>
Date: Tue, 7 Oct 2025 09:51:52 +0100
Subject: [PATCH] [llvm][DebugInfo] Support for versioned DISourceLanguageName
---
llvm/include/llvm/AsmParser/LLToken.h | 43 ++++++++--------
llvm/lib/AsmParser/LLLexer.cpp | 1 +
llvm/lib/AsmParser/LLParser.cpp | 49 ++++++++++++++++---
llvm/lib/Bitcode/Reader/MetadataLoader.cpp | 16 ++++--
llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 8 ++-
llvm/lib/IR/AsmWriter.cpp | 14 ++++--
...compileunit-conflicting-language-fields.ll | 4 ++
.../dicompileunit-invalid-language.ll | 22 +++++++++
.../invalid-dicompileunit-missing-language.ll | 2 +-
.../Bitcode/dwarf-source-language-name.ll | 15 ++++++
10 files changed, 136 insertions(+), 38 deletions(-)
create mode 100644 llvm/test/Assembler/dicompileunit-conflicting-language-fields.ll
create mode 100644 llvm/test/Assembler/dicompileunit-invalid-language.ll
create mode 100644 llvm/test/Bitcode/dwarf-source-language-name.ll
diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h
index d976d40e5e956..6de99fe182ad9 100644
--- a/llvm/include/llvm/AsmParser/LLToken.h
+++ b/llvm/include/llvm/AsmParser/LLToken.h
@@ -488,27 +488,28 @@ enum Kind {
SummaryID, // ^42
// String valued tokens (StrVal).
- LabelStr, // foo:
- GlobalVar, // @foo @"foo"
- ComdatVar, // $foo
- LocalVar, // %foo %"foo"
- MetadataVar, // !foo
- StringConstant, // "foo"
- DwarfTag, // DW_TAG_foo
- DwarfAttEncoding, // DW_ATE_foo
- DwarfVirtuality, // DW_VIRTUALITY_foo
- DwarfLang, // DW_LANG_foo
- DwarfCC, // DW_CC_foo
- EmissionKind, // lineTablesOnly
- NameTableKind, // GNU
- FixedPointKind, // Fixed point
- DwarfOp, // DW_OP_foo
- DIFlag, // DIFlagFoo
- DISPFlag, // DISPFlagFoo
- DwarfMacinfo, // DW_MACINFO_foo
- ChecksumKind, // CSK_foo
- DbgRecordType, // dbg_foo
- DwarfEnumKind, // DW_APPLE_ENUM_KIND_foo
+ LabelStr, // foo:
+ GlobalVar, // @foo @"foo"
+ ComdatVar, // $foo
+ LocalVar, // %foo %"foo"
+ MetadataVar, // !foo
+ StringConstant, // "foo"
+ DwarfTag, // DW_TAG_foo
+ DwarfAttEncoding, // DW_ATE_foo
+ DwarfVirtuality, // DW_VIRTUALITY_foo
+ DwarfLang, // DW_LANG_foo
+ DwarfSourceLangName, // DW_LNAME_foo
+ DwarfCC, // DW_CC_foo
+ EmissionKind, // lineTablesOnly
+ NameTableKind, // GNU
+ FixedPointKind, // Fixed point
+ DwarfOp, // DW_OP_foo
+ DIFlag, // DIFlagFoo
+ DISPFlag, // DISPFlagFoo
+ DwarfMacinfo, // DW_MACINFO_foo
+ ChecksumKind, // CSK_foo
+ DbgRecordType, // dbg_foo
+ DwarfEnumKind, // DW_APPLE_ENUM_KIND_foo
// Type valued tokens (TyVal).
Type,
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index f6937d38eb38c..50d1d4730007a 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -982,6 +982,7 @@ lltok::Kind LLLexer::LexIdentifier() {
DWKEYWORD(ATE, DwarfAttEncoding);
DWKEYWORD(VIRTUALITY, DwarfVirtuality);
DWKEYWORD(LANG, DwarfLang);
+ DWKEYWORD(LNAME, DwarfSourceLangName);
DWKEYWORD(CC, DwarfCC);
DWKEYWORD(OP, DwarfOp);
DWKEYWORD(MACINFO, DwarfMacinfo);
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 55899660fa84a..380b19296a3c4 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -4740,6 +4740,10 @@ struct DwarfLangField : public MDUnsignedField {
DwarfLangField() : MDUnsignedField(0, dwarf::DW_LANG_hi_user) {}
};
+struct DwarfSourceLangNameField : public MDUnsignedField {
+ DwarfSourceLangNameField() : MDUnsignedField(0, UINT32_MAX) {}
+};
+
struct DwarfCCField : public MDUnsignedField {
DwarfCCField() : MDUnsignedField(0, dwarf::DW_CC_hi_user) {}
};
@@ -4997,6 +5001,25 @@ bool LLParser::parseMDField(LocTy Loc, StringRef Name, DwarfLangField &Result) {
return false;
}
+template <>
+bool LLParser::parseMDField(LocTy Loc, StringRef Name,
+ DwarfSourceLangNameField &Result) {
+ if (Lex.getKind() == lltok::APSInt)
+ return parseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
+
+ if (Lex.getKind() != lltok::DwarfSourceLangName)
+ return tokError("expected DWARF source language name");
+
+ unsigned Lang = dwarf::getSourceLanguageName(Lex.getStrVal());
+ if (!Lang)
+ return tokError("invalid DWARF source language name" + Twine(" '") +
+ Lex.getStrVal() + "'");
+ assert(Lang <= Result.Max && "Expected valid DWARF source language name");
+ Result.assign(Lang);
+ Lex.Lex();
+ return false;
+}
+
template <>
bool LLParser::parseMDField(LocTy Loc, StringRef Name, DwarfCCField &Result) {
if (Lex.getKind() == lltok::APSInt)
@@ -5836,9 +5859,12 @@ bool LLParser::parseDICompileUnit(MDNode *&Result, bool IsDistinct) {
if (!IsDistinct)
return tokError("missing 'distinct', required for !DICompileUnit");
+ LocTy Loc = Lex.getLoc();
+
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
- REQUIRED(language, DwarfLangField, ); \
REQUIRED(file, MDField, (/* AllowNull */ false)); \
+ OPTIONAL(language, DwarfLangField, ); \
+ OPTIONAL(sourceLanguageName, DwarfSourceLangNameField, ); \
OPTIONAL(producer, MDStringField, ); \
OPTIONAL(isOptimized, MDBoolField, ); \
OPTIONAL(flags, MDStringField, ); \
@@ -5860,12 +5886,23 @@ bool LLParser::parseDICompileUnit(MDNode *&Result, bool IsDistinct) {
PARSE_MD_FIELDS();
#undef VISIT_MD_FIELDS
+ if (!language.Seen && !sourceLanguageName.Seen)
+ return error(Loc, "missing one of 'language' or 'sourceLanguageName', "
+ "required for !DICompileUnit");
+
+ if (language.Seen && sourceLanguageName.Seen)
+ return error(Loc, "can only specify one of 'language' and "
+ "'sourceLanguageName' on !DICompileUnit");
+
Result = DICompileUnit::getDistinct(
- Context, DISourceLanguageName(language.Val), file.Val, producer.Val,
- isOptimized.Val, flags.Val, runtimeVersion.Val, splitDebugFilename.Val,
- emissionKind.Val, enums.Val, retainedTypes.Val, globals.Val, imports.Val,
- macros.Val, dwoId.Val, splitDebugInlining.Val, debugInfoForProfiling.Val,
- nameTableKind.Val, rangesBaseAddress.Val, sysroot.Val, sdk.Val);
+ Context,
+ language.Seen ? DISourceLanguageName(language.Val)
+ : DISourceLanguageName(sourceLanguageName.Val, 0),
+ file.Val, producer.Val, isOptimized.Val, flags.Val, runtimeVersion.Val,
+ splitDebugFilename.Val, emissionKind.Val, enums.Val, retainedTypes.Val,
+ globals.Val, imports.Val, macros.Val, dwoId.Val, splitDebugInlining.Val,
+ debugInfoForProfiling.Val, nameTableKind.Val, rangesBaseAddress.Val,
+ sysroot.Val, sdk.Val);
return false;
}
diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
index a4d1b8372dfac..cdcf7a80ffac7 100644
--- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -1867,12 +1867,18 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
// distinct. It's always distinct.
IsDistinct = true;
+ const auto LangVersionMask = (uint64_t(1) << 63);
+ const bool HasVersionedLanguage = Record[1] & LangVersionMask;
+
auto *CU = DICompileUnit::getDistinct(
- Context, DISourceLanguageName(Record[1]), getMDOrNull(Record[2]),
- getMDString(Record[3]), Record[4], getMDString(Record[5]), Record[6],
- getMDString(Record[7]), Record[8], getMDOrNull(Record[9]),
- getMDOrNull(Record[10]), getMDOrNull(Record[12]),
- getMDOrNull(Record[13]),
+ Context,
+ HasVersionedLanguage
+ ? DISourceLanguageName(Record[1] & ~LangVersionMask, 0)
+ : DISourceLanguageName(Record[1]),
+ getMDOrNull(Record[2]), getMDString(Record[3]), Record[4],
+ getMDString(Record[5]), Record[6], getMDString(Record[7]), Record[8],
+ getMDOrNull(Record[9]), getMDOrNull(Record[10]),
+ getMDOrNull(Record[12]), getMDOrNull(Record[13]),
Record.size() <= 15 ? nullptr : getMDOrNull(Record[15]),
Record.size() <= 14 ? 0 : Record[14],
Record.size() <= 16 ? true : Record[16],
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 7ed140d392fca..0ca55a26f8013 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -2108,7 +2108,13 @@ void ModuleBitcodeWriter::writeDICompileUnit(const DICompileUnit *N,
assert(N->isDistinct() && "Expected distinct compile units");
Record.push_back(/* IsDistinct */ true);
- Record.push_back(N->getSourceLanguage().getUnversionedName());
+ auto Lang = N->getSourceLanguage();
+ Record.push_back(Lang.getName());
+ // Set bit so the MetadataLoader can distniguish between versioned and
+ // unversioned names.
+ if (Lang.hasVersionedName())
+ Record.back() ^= (uint64_t(1) << 63);
+
Record.push_back(VE.getMetadataOrNullID(N->getFile()));
Record.push_back(VE.getMetadataOrNullID(N->getRawProducer()));
Record.push_back(N->isOptimized());
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index ae086bcd3902d..0bc877d1c4471 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -2370,10 +2370,16 @@ static void writeDICompileUnit(raw_ostream &Out, const DICompileUnit *N,
Out << "!DICompileUnit(";
MDFieldPrinter Printer(Out, WriterCtx);
- Printer.printDwarfEnum("language",
- N->getSourceLanguage().getUnversionedName(),
- dwarf::LanguageString,
- /* ShouldSkipZero */ false);
+ auto Lang = N->getSourceLanguage();
+ if (Lang.hasVersionedName())
+ Printer.printDwarfEnum(
+ "sourceLanguageName",
+ static_cast<llvm::dwarf::SourceLanguageName>(Lang.getName()),
+ dwarf::SourceLanguageNameString,
+ /* ShouldSkipZero */ false);
+ else
+ Printer.printDwarfEnum("language", Lang.getName(), dwarf::LanguageString,
+ /* ShouldSkipZero */ false);
Printer.printMetadata("file", N->getRawFile(), /* ShouldSkipNull */ false);
Printer.printString("producer", N->getProducer());
diff --git a/llvm/test/Assembler/dicompileunit-conflicting-language-fields.ll b/llvm/test/Assembler/dicompileunit-conflicting-language-fields.ll
new file mode 100644
index 0000000000000..3aad27ba8a87a
--- /dev/null
+++ b/llvm/test/Assembler/dicompileunit-conflicting-language-fields.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+
+; CHECK: <stdin>:[[@LINE+1]]:15: error: can only specify one of 'language' and 'sourceLanguageName' on !DICompileUnit
+!0 = distinct !DICompileUnit(language: DW_LANG_C, sourceLanguageName: DW_LNAME_C, file: !DIFile(filename: "a", directory: "b"))
diff --git a/llvm/test/Assembler/dicompileunit-invalid-language.ll b/llvm/test/Assembler/dicompileunit-invalid-language.ll
new file mode 100644
index 0000000000000..da93c4f20e333
--- /dev/null
+++ b/llvm/test/Assembler/dicompileunit-invalid-language.ll
@@ -0,0 +1,22 @@
+; RUN: split-file %s %t
+; RUN: not llvm-as < %t/invalid_dw_lang.ll -disable-output 2>&1 | FileCheck %s --check-prefix=INVALID_DW_LANG
+; RUN: not llvm-as < %t/invalid_dw_lang_2.ll -disable-output 2>&1 | FileCheck %s --check-prefix=INVALID_DW_LANG_2
+; RUN: not llvm-as < %t/invalid_dw_lname.ll -disable-output 2>&1 | FileCheck %s --check-prefix=INVALID_DW_LNAME
+; RUN: not llvm-as < %t/invalid_dw_lname_2.ll -disable-output 2>&1 | FileCheck %s --check-prefix=INVALID_DW_LNAME_2
+
+; INVALID_DW_LANG: invalid DWARF language 'DW_LANG_blah'
+; INVALID_DW_LANG_2: expected DWARF language
+; INVALID_DW_LNAME: invalid DWARF source language name 'DW_LNAME_blah'
+; INVALID_DW_LNAME_2: expected DWARF source language name
+
+;--- invalid_dw_lang.ll
+!0 = distinct !DICompileUnit(language: DW_LANG_blah)
+
+;--- invalid_dw_lang_2.ll
+!0 = distinct !DICompileUnit(language: DW_LNAME_C)
+
+;--- invalid_dw_lname.ll
+!0 = distinct !DICompileUnit(sourceLanguageName: DW_LNAME_blah)
+
+;--- invalid_dw_lname_2.ll
+!0 = distinct !DICompileUnit(sourceLanguageName: DW_LANG_C)
diff --git a/llvm/test/Assembler/invalid-dicompileunit-missing-language.ll b/llvm/test/Assembler/invalid-dicompileunit-missing-language.ll
index 8e4cb0261dbbf..ebc86e3410d66 100644
--- a/llvm/test/Assembler/invalid-dicompileunit-missing-language.ll
+++ b/llvm/test/Assembler/invalid-dicompileunit-missing-language.ll
@@ -1,4 +1,4 @@
; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
-; CHECK: <stdin>:[[@LINE+1]]:74: error: missing required field 'language'
+; CHECK: <stdin>:[[@LINE+1]]:15: error: missing one of 'language' or 'sourceLanguageName', required for !DICompileUnit
!0 = distinct !DICompileUnit(file: !DIFile(filename: "a", directory: "b"))
diff --git a/llvm/test/Bitcode/dwarf-source-language-name.ll b/llvm/test/Bitcode/dwarf-source-language-name.ll
new file mode 100644
index 0000000000000..e9893638f61b5
--- /dev/null
+++ b/llvm/test/Bitcode/dwarf-source-language-name.ll
@@ -0,0 +1,15 @@
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
+
+; CHECK: sourceLanguageName: DW_LNAME_ObjC_plus_plus
+
+source_filename = "cu.cpp"
+target triple = "arm64-apple-macosx"
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+
+!0 = distinct !DICompileUnit(sourceLanguageName: DW_LNAME_ObjC_plus_plus, file: !1, producer: "handwritten", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !2, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/")
+!1 = !DIFile(filename: "cu.cpp", directory: "/tmp")
+!2 = !{}
+!3 = !{i32 7, !"Dwarf Version", i32 5}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
More information about the llvm-commits
mailing list