[clang] [clang][DebugInfo] Emit DW_AT_language_{name, version} for DWARFv6 (PR #163208)
Michael Buch via cfe-commits
cfe-commits at lists.llvm.org
Wed Oct 15 12:54:22 PDT 2025
https://github.com/Michael137 updated https://github.com/llvm/llvm-project/pull/163208
>From 532be3831f51937b38a8dbff4187585099f69afa Mon Sep 17 00:00:00 2001
From: Michael Buch <michaelbuch12 at gmail.com>
Date: Tue, 7 Oct 2025 09:52:19 +0100
Subject: [PATCH 1/4] [clang][DebugInfo] Emit DW_AT_language_name for DWARFv6
---
clang/lib/CodeGen/CGDebugInfo.cpp | 134 ++++++++++++++----
.../test/DebugInfo/CXX/versioned-language.cpp | 23 +++
.../DebugInfo/Generic/versioned-language.c | 17 +++
.../test/DebugInfo/ObjC/versioned-language.m | 9 ++
.../DebugInfo/ObjCXX/versioned-language.mm | 9 ++
5 files changed, 162 insertions(+), 30 deletions(-)
create mode 100644 clang/test/DebugInfo/CXX/versioned-language.cpp
create mode 100644 clang/test/DebugInfo/Generic/versioned-language.c
create mode 100644 clang/test/DebugInfo/ObjC/versioned-language.m
create mode 100644 clang/test/DebugInfo/ObjCXX/versioned-language.mm
diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index 9fe9a13610296..79079ccb216cd 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -41,8 +41,10 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
@@ -647,6 +649,77 @@ StringRef CGDebugInfo::getCurrentDirname() {
return CGM.getCodeGenOpts().DebugCompilationDir;
}
+static llvm::dwarf::SourceLanguage GetSourceLanguage(const CodeGenModule &CGM) {
+ const CodeGenOptions &CGO = CGM.getCodeGenOpts();
+ const LangOptions &LO = CGM.getLangOpts();
+
+ assert(CGO.DwarfVersion <= 5);
+
+ llvm::dwarf::SourceLanguage LangTag;
+ if (LO.CPlusPlus) {
+ if (LO.ObjC)
+ LangTag = llvm::dwarf::DW_LANG_ObjC_plus_plus;
+ else if (CGO.DebugStrictDwarf && CGO.DwarfVersion < 5)
+ LangTag = llvm::dwarf::DW_LANG_C_plus_plus;
+ else if (LO.CPlusPlus14)
+ LangTag = llvm::dwarf::DW_LANG_C_plus_plus_14;
+ else if (LO.CPlusPlus11)
+ LangTag = llvm::dwarf::DW_LANG_C_plus_plus_11;
+ else
+ LangTag = llvm::dwarf::DW_LANG_C_plus_plus;
+ } else if (LO.ObjC) {
+ LangTag = llvm::dwarf::DW_LANG_ObjC;
+ } else if (LO.OpenCL && (!CGO.DebugStrictDwarf || CGO.DwarfVersion >= 5)) {
+ LangTag = llvm::dwarf::DW_LANG_OpenCL;
+ } else if (LO.C11 && !(CGO.DebugStrictDwarf && CGO.DwarfVersion < 5)) {
+ LangTag = llvm::dwarf::DW_LANG_C11;
+ } else if (LO.C99) {
+ LangTag = llvm::dwarf::DW_LANG_C99;
+ } else {
+ LangTag = llvm::dwarf::DW_LANG_C89;
+ }
+
+ return LangTag;
+}
+
+static std::pair<llvm::dwarf::SourceLanguageName, uint32_t>
+GetSourceLanguageName(const CodeGenModule &CGM) {
+ const CodeGenOptions &CGO = CGM.getCodeGenOpts();
+ const LangOptions &LO = CGM.getLangOpts();
+
+ assert(CGO.DwarfVersion >= 6);
+
+ uint32_t LangVersion = 0;
+ llvm::dwarf::SourceLanguageName LangTag;
+ if (LO.CPlusPlus) {
+ if (LO.ObjC) {
+ LangTag = llvm::dwarf::DW_LNAME_ObjC_plus_plus;
+ } else {
+ LangTag = llvm::dwarf::DW_LNAME_C_plus_plus;
+ LangVersion = LO.getCPlusPlusLangStd().value_or(0);
+ }
+ } else if (LO.ObjC) {
+ LangTag = llvm::dwarf::DW_LNAME_ObjC;
+ } else if (LO.OpenCL) {
+ LangTag = llvm::dwarf::DW_LNAME_OpenCL_C;
+ } else {
+ LangTag = llvm::dwarf::DW_LNAME_C;
+ LangVersion = LO.getCLangStd().value_or(0);
+ }
+
+ return {LangTag, LangVersion};
+}
+
+static llvm::DISourceLanguageName
+GetDISourceLanguageName(const CodeGenModule &CGM) {
+ // Emit pre-DWARFv6 language codes.
+ if (CGM.getCodeGenOpts().DwarfVersion < 6)
+ return llvm::DISourceLanguageName(GetSourceLanguage(CGM));
+
+ auto [LName, LVersion] = GetSourceLanguageName(CGM);
+ return llvm::DISourceLanguageName(LName, LVersion);
+}
+
void CGDebugInfo::CreateCompileUnit() {
SmallString<64> Checksum;
std::optional<llvm::DIFile::ChecksumKind> CSKind;
@@ -702,31 +775,6 @@ void CGDebugInfo::CreateCompileUnit() {
}
}
- llvm::dwarf::SourceLanguage LangTag;
- if (LO.CPlusPlus) {
- if (LO.ObjC)
- LangTag = llvm::dwarf::DW_LANG_ObjC_plus_plus;
- else if (CGO.DebugStrictDwarf && CGO.DwarfVersion < 5)
- LangTag = llvm::dwarf::DW_LANG_C_plus_plus;
- else if (LO.CPlusPlus14)
- LangTag = llvm::dwarf::DW_LANG_C_plus_plus_14;
- else if (LO.CPlusPlus11)
- LangTag = llvm::dwarf::DW_LANG_C_plus_plus_11;
- else
- LangTag = llvm::dwarf::DW_LANG_C_plus_plus;
- } else if (LO.ObjC) {
- LangTag = llvm::dwarf::DW_LANG_ObjC;
- } else if (LO.OpenCL && (!CGM.getCodeGenOpts().DebugStrictDwarf ||
- CGM.getCodeGenOpts().DwarfVersion >= 5)) {
- LangTag = llvm::dwarf::DW_LANG_OpenCL;
- } else if (LO.C11 && !(CGO.DebugStrictDwarf && CGO.DwarfVersion < 5)) {
- LangTag = llvm::dwarf::DW_LANG_C11;
- } else if (LO.C99) {
- LangTag = llvm::dwarf::DW_LANG_C99;
- } else {
- LangTag = llvm::dwarf::DW_LANG_C89;
- }
-
std::string Producer = getClangFullVersion();
// Figure out which version of the ObjC runtime we have.
@@ -787,7 +835,7 @@ void CGDebugInfo::CreateCompileUnit() {
// Create new compile unit.
TheCU = DBuilder.createCompileUnit(
- llvm::DISourceLanguageName(LangTag), CUFile,
+ GetDISourceLanguageName(CGM), CUFile,
CGOpts.EmitVersionIdentMetadata ? Producer : "",
CGOpts.OptimizationLevel != 0 || CGOpts.PrepareForLTO ||
CGOpts.PrepareForThinLTO,
@@ -1234,20 +1282,46 @@ llvm::DIType *CGDebugInfo::CreateType(const PointerType *Ty,
Ty->getPointeeType(), Unit);
}
-/// \return whether a C++ mangling exists for the type defined by TD.
-static bool hasCXXMangling(const TagDecl *TD, llvm::DICompileUnit *TheCU) {
- switch (TheCU->getSourceLanguage().getUnversionedName()) {
+static bool hasCXXMangling(llvm::dwarf::SourceLanguage Lang, bool IsTagDecl) {
+ switch (Lang) {
case llvm::dwarf::DW_LANG_C_plus_plus:
case llvm::dwarf::DW_LANG_C_plus_plus_11:
case llvm::dwarf::DW_LANG_C_plus_plus_14:
return true;
case llvm::dwarf::DW_LANG_ObjC_plus_plus:
- return isa<CXXRecordDecl>(TD) || isa<EnumDecl>(TD);
+ return IsTagDecl;
default:
return false;
}
}
+static bool hasCXXMangling(llvm::dwarf::SourceLanguageName Lang,
+ bool IsTagDecl) {
+ switch (Lang) {
+ case llvm::dwarf::DW_LNAME_C_plus_plus:
+ return true;
+ case llvm::dwarf::DW_LNAME_ObjC_plus_plus:
+ return IsTagDecl;
+ default:
+ return false;
+ }
+}
+
+/// \return whether a C++ mangling exists for the type defined by TD.
+static bool hasCXXMangling(const TagDecl *TD, llvm::DICompileUnit *TheCU) {
+ const bool IsTagDecl = isa<CXXRecordDecl>(TD) || isa<EnumDecl>(TD);
+
+ if (llvm::DISourceLanguageName SourceLang = TheCU->getSourceLanguage();
+ SourceLang.hasVersionedName())
+ return hasCXXMangling(
+ static_cast<llvm::dwarf::SourceLanguageName>(SourceLang.getName()),
+ IsTagDecl);
+ else
+ return hasCXXMangling(
+ static_cast<llvm::dwarf::SourceLanguage>(SourceLang.getName()),
+ IsTagDecl);
+}
+
// Determines if the debug info for this tag declaration needs a type
// identifier. The purpose of the unique identifier is to deduplicate type
// information for identical types across TUs. Because of the C++ one definition
diff --git a/clang/test/DebugInfo/CXX/versioned-language.cpp b/clang/test/DebugInfo/CXX/versioned-language.cpp
new file mode 100644
index 0000000000000..a57531412db68
--- /dev/null
+++ b/clang/test/DebugInfo/CXX/versioned-language.cpp
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -emit-llvm %s -o - -debug-info-kind=limited -dwarf-version=5 -std=c++98 \
+// RUN: | FileCheck %s --implicit-check-not "sourceLanguageName" --implicit-check-not "sourceLanguageVersion"
+//
+// RUN: %clang_cc1 -emit-llvm %s -o - -debug-info-kind=limited -dwarf-version=6 -std=c++98 | FileCheck %s --check-prefix=CHECK-CPP98
+// RUN: %clang_cc1 -emit-llvm %s -o - -debug-info-kind=limited -dwarf-version=6 -std=c++03 | FileCheck %s --check-prefix=CHECK-CPP03
+// RUN: %clang_cc1 -emit-llvm %s -o - -debug-info-kind=limited -dwarf-version=6 -std=c++11 | FileCheck %s --check-prefix=CHECK-CPP11
+// RUN: %clang_cc1 -emit-llvm %s -o - -debug-info-kind=limited -dwarf-version=6 -std=c++14 | FileCheck %s --check-prefix=CHECK-CPP14
+// RUN: %clang_cc1 -emit-llvm %s -o - -debug-info-kind=limited -dwarf-version=6 -std=c++17 | FileCheck %s --check-prefix=CHECK-CPP17
+// RUN: %clang_cc1 -emit-llvm %s -o - -debug-info-kind=limited -dwarf-version=6 -std=c++20 | FileCheck %s --check-prefix=CHECK-CPP20
+// RUN: %clang_cc1 -emit-llvm %s -o - -debug-info-kind=limited -dwarf-version=6 -std=c++23 | FileCheck %s --check-prefix=CHECK-CPP23
+// RUN: %clang_cc1 -emit-llvm %s -o - -debug-info-kind=limited -dwarf-version=6 -std=c++2c | FileCheck %s --check-prefix=CHECK-CPP2C
+
+int globalVar = 10;
+
+// CHECK-CPP98: !DICompileUnit(sourceLanguageName: DW_LNAME_C_plus_plus, sourceLanguageVersion: 199711
+// FIXME: C++03 technically has no official standard version code. From Clang's point of view C++03 and C++98 are interchangable.
+// CHECK-CPP03: !DICompileUnit(sourceLanguageName: DW_LNAME_C_plus_plus, sourceLanguageVersion: 199711
+// CHECK-CPP11: !DICompileUnit(sourceLanguageName: DW_LNAME_C_plus_plus, sourceLanguageVersion: 201103
+// CHECK-CPP14: !DICompileUnit(sourceLanguageName: DW_LNAME_C_plus_plus, sourceLanguageVersion: 201402
+// CHECK-CPP17: !DICompileUnit(sourceLanguageName: DW_LNAME_C_plus_plus, sourceLanguageVersion: 201703
+// CHECK-CPP20: !DICompileUnit(sourceLanguageName: DW_LNAME_C_plus_plus, sourceLanguageVersion: 202002
+// CHECK-CPP23: !DICompileUnit(sourceLanguageName: DW_LNAME_C_plus_plus, sourceLanguageVersion: 202302
+// CHECK-CPP2C: !DICompileUnit(sourceLanguageName: DW_LNAME_C_plus_plus, sourceLanguageVersion: 202400
diff --git a/clang/test/DebugInfo/Generic/versioned-language.c b/clang/test/DebugInfo/Generic/versioned-language.c
new file mode 100644
index 0000000000000..1faa7b4b56d4e
--- /dev/null
+++ b/clang/test/DebugInfo/Generic/versioned-language.c
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 -emit-llvm %s -o - -debug-info-kind=limited -dwarf-version=5 -std=c99 \
+// RUN: | FileCheck %s --implicit-check-not "sourceLanguageName" --implicit-check-not "sourceLanguageVersion"
+//
+// RUN: %clang_cc1 -emit-llvm %s -o - -debug-info-kind=limited -dwarf-version=6 -std=c89 | FileCheck %s --check-prefix=CHECK-C89 --implicit-check-not "sourceLanguageVersion"
+// RUN: %clang_cc1 -emit-llvm %s -o - -debug-info-kind=limited -dwarf-version=6 -std=c99 | FileCheck %s --check-prefix=CHECK-C99
+// RUN: %clang_cc1 -emit-llvm %s -o - -debug-info-kind=limited -dwarf-version=6 -std=c17 | FileCheck %s --check-prefix=CHECK-C17
+// RUN: %clang_cc1 -emit-llvm %s -o - -debug-info-kind=limited -dwarf-version=6 -std=c23 | FileCheck %s --check-prefix=CHECK-C23
+// RUN: %clang_cc1 -emit-llvm %s -o - -debug-info-kind=limited -dwarf-version=6 -std=c2y | FileCheck %s --check-prefix=CHECK-C2Y
+
+int globalVar = 10;
+
+// CHECK-C89: !DICompileUnit(sourceLanguageName: DW_LNAME_C,
+// CHECK-C99: !DICompileUnit(sourceLanguageName: DW_LNAME_C, sourceLanguageVersion: 199901
+// CHECK-C11: !DICompileUnit(sourceLanguageName: DW_LNAME_C, sourceLanguageVersion: 201112
+// CHECK-C17: !DICompileUnit(sourceLanguageName: DW_LNAME_C, sourceLanguageVersion: 201710
+// CHECK-C23: !DICompileUnit(sourceLanguageName: DW_LNAME_C, sourceLanguageVersion: 202311
+// CHECK-C2Y: !DICompileUnit(sourceLanguageName: DW_LNAME_C, sourceLanguageVersion: 202400
diff --git a/clang/test/DebugInfo/ObjC/versioned-language.m b/clang/test/DebugInfo/ObjC/versioned-language.m
new file mode 100644
index 0000000000000..178c47bf8c841
--- /dev/null
+++ b/clang/test/DebugInfo/ObjC/versioned-language.m
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -emit-llvm %s -o - -debug-info-kind=limited -dwarf-version=5 \
+// RUN: | FileCheck %s --implicit-check-not "sourceLanguageName" --implicit-check-not "sourceLanguageVersion"
+//
+// RUN: %clang_cc1 -emit-llvm %s -o - -debug-info-kind=limited -dwarf-version=6 \
+// RUN: | FileCheck %s --implicit-check-not "sourceLanguageVersion" --check-prefix=CHECK-OBJC
+
+int globalVar = 10;
+
+// CHECK-OBJC: !DICompileUnit(sourceLanguageName: DW_LNAME_ObjC,
diff --git a/clang/test/DebugInfo/ObjCXX/versioned-language.mm b/clang/test/DebugInfo/ObjCXX/versioned-language.mm
new file mode 100644
index 0000000000000..bfdce462b2bf1
--- /dev/null
+++ b/clang/test/DebugInfo/ObjCXX/versioned-language.mm
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -emit-llvm %s -o - -debug-info-kind=limited -dwarf-version=5 \
+// RUN: | FileCheck %s --implicit-check-not "sourceLanguageName" --implicit-check-not "sourceLanguageVersion"
+//
+// RUN: %clang_cc1 -emit-llvm %s -o - -debug-info-kind=limited -dwarf-version=6 \
+// RUN: | FileCheck %s --implicit-check-not "sourceLanguageVersion" --check-prefix=CHECK-OBJCXX
+
+int globalVar = 10;
+
+// CHECK-OBJCXX: !DICompileUnit(sourceLanguageName: DW_LNAME_ObjC_plus_plus,
>From aec58c8879fe536856d1dea0706248cbc2254527 Mon Sep 17 00:00:00 2001
From: Michael Buch <michaelbuch12 at gmail.com>
Date: Wed, 15 Oct 2025 17:03:01 +0100
Subject: [PATCH 2/4] fixup! remove redundant includes
---
clang/lib/CodeGen/CGDebugInfo.cpp | 2 --
1 file changed, 2 deletions(-)
diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index 79079ccb216cd..b8533d17e02ef 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -41,10 +41,8 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
>From 18773c6b7ba403061c54992c90bb8dad5f4c3385 Mon Sep 17 00:00:00 2001
From: Michael Buch <michaelbuch12 at gmail.com>
Date: Wed, 15 Oct 2025 20:52:10 +0100
Subject: [PATCH 3/4] fixup! exercise hasCXXMangling codepath
---
clang/test/DebugInfo/CXX/versioned-language.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/test/DebugInfo/CXX/versioned-language.cpp b/clang/test/DebugInfo/CXX/versioned-language.cpp
index a57531412db68..4cb2b29086035 100644
--- a/clang/test/DebugInfo/CXX/versioned-language.cpp
+++ b/clang/test/DebugInfo/CXX/versioned-language.cpp
@@ -10,7 +10,7 @@
// RUN: %clang_cc1 -emit-llvm %s -o - -debug-info-kind=limited -dwarf-version=6 -std=c++23 | FileCheck %s --check-prefix=CHECK-CPP23
// RUN: %clang_cc1 -emit-llvm %s -o - -debug-info-kind=limited -dwarf-version=6 -std=c++2c | FileCheck %s --check-prefix=CHECK-CPP2C
-int globalVar = 10;
+struct Foo {} globalVar;
// CHECK-CPP98: !DICompileUnit(sourceLanguageName: DW_LNAME_C_plus_plus, sourceLanguageVersion: 199711
// FIXME: C++03 technically has no official standard version code. From Clang's point of view C++03 and C++98 are interchangable.
>From b064dc349b942dc24bb28dde8f49b7120de4fa7b Mon Sep 17 00:00:00 2001
From: Michael Buch <michaelbuch12 at gmail.com>
Date: Wed, 15 Oct 2025 20:54:01 +0100
Subject: [PATCH 4/4] fixup! inline GetSourceLanguageName
---
clang/lib/CodeGen/CGDebugInfo.cpp | 23 +++++++----------------
1 file changed, 7 insertions(+), 16 deletions(-)
diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index b8533d17e02ef..3a656d39a0094 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -680,12 +680,13 @@ static llvm::dwarf::SourceLanguage GetSourceLanguage(const CodeGenModule &CGM) {
return LangTag;
}
-static std::pair<llvm::dwarf::SourceLanguageName, uint32_t>
-GetSourceLanguageName(const CodeGenModule &CGM) {
- const CodeGenOptions &CGO = CGM.getCodeGenOpts();
- const LangOptions &LO = CGM.getLangOpts();
+static llvm::DISourceLanguageName
+GetDISourceLanguageName(const CodeGenModule &CGM) {
+ // Emit pre-DWARFv6 language codes.
+ if (CGM.getCodeGenOpts().DwarfVersion < 6)
+ return llvm::DISourceLanguageName(GetSourceLanguage(CGM));
- assert(CGO.DwarfVersion >= 6);
+ const LangOptions &LO = CGM.getLangOpts();
uint32_t LangVersion = 0;
llvm::dwarf::SourceLanguageName LangTag;
@@ -705,17 +706,7 @@ GetSourceLanguageName(const CodeGenModule &CGM) {
LangVersion = LO.getCLangStd().value_or(0);
}
- return {LangTag, LangVersion};
-}
-
-static llvm::DISourceLanguageName
-GetDISourceLanguageName(const CodeGenModule &CGM) {
- // Emit pre-DWARFv6 language codes.
- if (CGM.getCodeGenOpts().DwarfVersion < 6)
- return llvm::DISourceLanguageName(GetSourceLanguage(CGM));
-
- auto [LName, LVersion] = GetSourceLanguageName(CGM);
- return llvm::DISourceLanguageName(LName, LVersion);
+ return llvm::DISourceLanguageName(LangTag, LangVersion);
}
void CGDebugInfo::CreateCompileUnit() {
More information about the cfe-commits
mailing list