[llvm] [llvm][Dwarf] Add LanguageDescription API that accounts for version (PR #162048)
Michael Buch via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 8 09:56:02 PDT 2025
https://github.com/Michael137 updated https://github.com/llvm/llvm-project/pull/162048
>From 845c1c97e774f76be079af4ed4462abb1ff14a87 Mon Sep 17 00:00:00 2001
From: Michael Buch <michaelbuch12 at gmail.com>
Date: Mon, 6 Oct 2025 09:37:51 +0100
Subject: [PATCH 1/6] [llvm][Dwarf] Add LanguageDescription API that accounts
for version
Currently `llvm::dwarf::LanguageDescription` returns a stringified
`DW_LNAME`. It would be useful to have an API that returns the language
name for a particular `DW_LNAME_`/version pair. LLDB's use case is that
it wants to display a human readable description of the language we got
from debug-info in diagnostics. We could maintain a side-table in LLDB
but though this might generally be useful to live next to the
`LanguageDescription` API.
---
llvm/include/llvm/BinaryFormat/Dwarf.h | 5 ++
llvm/lib/BinaryFormat/Dwarf.cpp | 111 +++++++++++++++++++++++++
2 files changed, 116 insertions(+)
diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.h b/llvm/include/llvm/BinaryFormat/Dwarf.h
index ba74ab9515a75..dea8b485027fe 100644
--- a/llvm/include/llvm/BinaryFormat/Dwarf.h
+++ b/llvm/include/llvm/BinaryFormat/Dwarf.h
@@ -500,8 +500,13 @@ toDW_LNAME(SourceLanguage language) {
return {};
}
+/// Returns a version-independent language name.
LLVM_ABI llvm::StringRef LanguageDescription(SourceLanguageName name);
+/// Returns a language name corresponding to the specified version.
+LLVM_ABI llvm::StringRef LanguageDescription(SourceLanguageName Name,
+ uint32_t Version);
+
inline bool isCPlusPlus(SourceLanguage S) {
bool result = false;
// Deliberately enumerate all the language options so we get a warning when
diff --git a/llvm/lib/BinaryFormat/Dwarf.cpp b/llvm/lib/BinaryFormat/Dwarf.cpp
index 9690ff9107df8..e48df7035e1e7 100644
--- a/llvm/lib/BinaryFormat/Dwarf.cpp
+++ b/llvm/lib/BinaryFormat/Dwarf.cpp
@@ -472,6 +472,117 @@ StringRef llvm::dwarf::LanguageDescription(dwarf::SourceLanguageName lname) {
return "Unknown";
}
+StringRef llvm::dwarf::LanguageDescription(dwarf::SourceLanguageName Name,
+ uint32_t Version) {
+ switch (Name) {
+ // YYYY
+ case DW_LNAME_Ada: {
+ if (Version <= 1983)
+ return "Ada 83";
+ if (Version <= 1995)
+ return "Ada 95";
+ if (Version <= 2005)
+ return "Ada 2005";
+ if (Version <= 2012)
+ return "Ada 2012";
+ } break;
+
+ case DW_LNAME_Cobol: {
+ if (Version <= 1974)
+ return "COBOL-74";
+ if (Version <= 1985)
+ return "COBOL-85";
+ } break;
+
+ case DW_LNAME_Fortran: {
+ if (Version <= 1977)
+ return "FORTRAN 77";
+ if (Version <= 1990)
+ return "FORTRAN 90";
+ if (Version <= 1995)
+ return "Fortran 95";
+ if (Version <= 2003)
+ return "Fortran 2003";
+ if (Version <= 2008)
+ return "Fortran 2008";
+ if (Version <= 2018)
+ return "Fortran 2018";
+ } break;
+
+ // YYYYMM
+ case DW_LNAME_C: {
+ if (Version == 0)
+ return "K&R C";
+ if (Version <= 198912)
+ return "C89";
+ if (Version <= 199901)
+ return "C99";
+ if (Version <= 201112)
+ return "C11";
+ if (Version <= 201710)
+ return "C17";
+ } break;
+
+ case DW_LNAME_C_plus_plus: {
+ if (Version == 0)
+ break;
+ if (Version <= 199711)
+ return "C++98";
+ if (Version <= 200310)
+ return "C++03";
+ if (Version <= 201103)
+ return "C++11";
+ if (Version <= 201402)
+ return "C++14";
+ if (Version <= 201703)
+ return "C++17";
+ if (Version <= 202002)
+ return "C++20";
+ } break;
+
+ case DW_LNAME_ObjC_plus_plus:
+ case DW_LNAME_ObjC:
+ case DW_LNAME_Move:
+ case DW_LNAME_SYCL:
+ case DW_LNAME_BLISS:
+ case DW_LNAME_Crystal:
+ case DW_LNAME_D:
+ case DW_LNAME_Dylan:
+ case DW_LNAME_Go:
+ case DW_LNAME_Haskell:
+ case DW_LNAME_HLSL:
+ case DW_LNAME_Java:
+ case DW_LNAME_Julia:
+ case DW_LNAME_Kotlin:
+ case DW_LNAME_Modula2:
+ case DW_LNAME_Modula3:
+ case DW_LNAME_OCaml:
+ case DW_LNAME_OpenCL_C:
+ case DW_LNAME_Pascal:
+ case DW_LNAME_PLI:
+ case DW_LNAME_Python:
+ case DW_LNAME_RenderScript:
+ case DW_LNAME_Rust:
+ case DW_LNAME_Swift:
+ case DW_LNAME_UPC:
+ case DW_LNAME_Zig:
+ case DW_LNAME_Assembly:
+ case DW_LNAME_C_sharp:
+ case DW_LNAME_Mojo:
+ case DW_LNAME_GLSL:
+ case DW_LNAME_GLSL_ES:
+ case DW_LNAME_OpenCL_CPP:
+ case DW_LNAME_CPP_for_OpenCL:
+ case DW_LNAME_Ruby:
+ case DW_LNAME_Hylo:
+ case DW_LNAME_Metal:
+ break;
+ }
+
+ // Fallback to un-versioned name.
+ return LanguageDescription(Name);
+}
+
llvm::StringRef llvm::dwarf::SourceLanguageNameString(SourceLanguageName Lang) {
switch (Lang) {
#define HANDLE_DW_LNAME(ID, NAME, DESC, LOWER_BOUND) \
>From 335d824a418a4918efba7b0e1056a7217a0785d7 Mon Sep 17 00:00:00 2001
From: Michael Buch <michaelbuch12 at gmail.com>
Date: Mon, 6 Oct 2025 09:46:51 +0100
Subject: [PATCH 2/6] fixup! expand docs
---
llvm/include/llvm/BinaryFormat/Dwarf.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.h b/llvm/include/llvm/BinaryFormat/Dwarf.h
index dea8b485027fe..815e85ddd7a92 100644
--- a/llvm/include/llvm/BinaryFormat/Dwarf.h
+++ b/llvm/include/llvm/BinaryFormat/Dwarf.h
@@ -504,6 +504,8 @@ toDW_LNAME(SourceLanguage language) {
LLVM_ABI llvm::StringRef LanguageDescription(SourceLanguageName name);
/// Returns a language name corresponding to the specified version.
+/// If the version is not recognized for the specified language, returns
+/// the version-independent name.
LLVM_ABI llvm::StringRef LanguageDescription(SourceLanguageName Name,
uint32_t Version);
>From 427784dde0a1a1f926f6589c292a622a4ffb7c77 Mon Sep 17 00:00:00 2001
From: Michael Buch <michaelbuch12 at gmail.com>
Date: Mon, 6 Oct 2025 09:59:36 +0100
Subject: [PATCH 3/6] fixup! handle C version 0
---
llvm/lib/BinaryFormat/Dwarf.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/BinaryFormat/Dwarf.cpp b/llvm/lib/BinaryFormat/Dwarf.cpp
index e48df7035e1e7..969047a8e5b5c 100644
--- a/llvm/lib/BinaryFormat/Dwarf.cpp
+++ b/llvm/lib/BinaryFormat/Dwarf.cpp
@@ -512,7 +512,7 @@ StringRef llvm::dwarf::LanguageDescription(dwarf::SourceLanguageName Name,
// YYYYMM
case DW_LNAME_C: {
if (Version == 0)
- return "K&R C";
+ break;
if (Version <= 198912)
return "C89";
if (Version <= 199901)
>From 1c62bcc3eea2f4da3899d622abd06f0d91548cdf Mon Sep 17 00:00:00 2001
From: Michael Buch <michaelbuch12 at gmail.com>
Date: Mon, 6 Oct 2025 18:15:21 +0100
Subject: [PATCH 4/6] fixup! unittests
---
.../DebugInfo/DWARF/DWARFDebugInfoTest.cpp | 42 +++++++++++++++++++
1 file changed, 42 insertions(+)
diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp
index 373a58d259af5..ae7960e948801 100644
--- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp
+++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp
@@ -2276,4 +2276,46 @@ TEST(DWARFDebugInfo, TestDWARF64UnitLength) {
});
}
+TEST(DWARFDebugInfo, TestLanguageDescription_Versioned) {
+ // Tests for the llvm::dwarf::LanguageDescription API that
+ // takes a name *and* a version.
+
+ // Unknown language.
+ EXPECT_EQ(
+ llvm::dwarf::LanguageDescription(static_cast<SourceLanguageName>(0)),
+ "Unknown");
+
+ // Test that specifying an invalid version falls back to a valid language name
+ // regardless.
+ EXPECT_EQ(llvm::dwarf::LanguageDescription(
+ static_cast<SourceLanguageName>(DW_LNAME_ObjC), 0),
+ "Objective C");
+ EXPECT_EQ(llvm::dwarf::LanguageDescription(
+ static_cast<SourceLanguageName>(DW_LNAME_Julia), 0),
+ "Julia");
+
+ // Check some versions.
+ EXPECT_EQ(llvm::dwarf::LanguageDescription(DW_LNAME_C_plus_plus, 199711),
+ "C++98");
+ EXPECT_EQ(llvm::dwarf::LanguageDescription(DW_LNAME_C_plus_plus, 201402),
+ "C++14");
+
+ // Versions round up.
+ EXPECT_EQ(llvm::dwarf::LanguageDescription(DW_LNAME_C_plus_plus, 201400),
+ "C++14");
+
+ // Version 0 for C and C++ is an unversioned name.
+ EXPECT_EQ(llvm::dwarf::LanguageDescription(
+ static_cast<SourceLanguageName>(DW_LNAME_C), 0),
+ "C (K&R and ISO)");
+ EXPECT_EQ(llvm::dwarf::LanguageDescription(
+ static_cast<SourceLanguageName>(DW_LNAME_C_plus_plus), 0),
+ "ISO C++");
+
+ // Version 0 for other versioned languages may not be the unversioned name.
+ EXPECT_EQ(llvm::dwarf::LanguageDescription(
+ static_cast<SourceLanguageName>(DW_LNAME_Fortran), 0),
+ "FORTRAN 77");
+}
+
} // end anonymous namespace
>From 924d1118c223706287a3284e74eaf497e8112cce Mon Sep 17 00:00:00 2001
From: Michael Buch <michaelbuch12 at gmail.com>
Date: Wed, 8 Oct 2025 08:43:01 +0100
Subject: [PATCH 5/6] fixup! move tests, remove redundant casts
---
llvm/unittests/BinaryFormat/DwarfTest.cpp | 38 ++++++++++++++++
.../DebugInfo/DWARF/DWARFDebugInfoTest.cpp | 43 -------------------
2 files changed, 38 insertions(+), 43 deletions(-)
diff --git a/llvm/unittests/BinaryFormat/DwarfTest.cpp b/llvm/unittests/BinaryFormat/DwarfTest.cpp
index 1162eb709aa83..f4519f61adf85 100644
--- a/llvm/unittests/BinaryFormat/DwarfTest.cpp
+++ b/llvm/unittests/BinaryFormat/DwarfTest.cpp
@@ -254,4 +254,42 @@ TEST(DwarfTest, lname_SourceLanguageNameString) {
EXPECT_EQ(SourceLanguageNameString(DW_LNAME_##NAME), xstr(DW_LNAME_##NAME));
#include "llvm/BinaryFormat/Dwarf.def"
}
+
+TEST(DWARFDebugInfo, TestLanguageDescription_Versioned) {
+ // Tests for the llvm::dwarf::LanguageDescription API that
+ // takes a name *and* a version.
+
+ // Unknown language.
+ EXPECT_EQ(
+ llvm::dwarf::LanguageDescription(static_cast<SourceLanguageName>(0)),
+ "Unknown");
+
+ EXPECT_EQ(
+ llvm::dwarf::LanguageDescription(static_cast<SourceLanguageName>(0), 0),
+ "Unknown");
+
+ // Test that specifying an invalid version falls back to a valid language name
+ // regardless.
+ EXPECT_EQ(llvm::dwarf::LanguageDescription(DW_LNAME_ObjC, 0), "Objective C");
+ EXPECT_EQ(llvm::dwarf::LanguageDescription(DW_LNAME_Julia, 0), "Julia");
+
+ // Check some versions.
+ EXPECT_EQ(llvm::dwarf::LanguageDescription(DW_LNAME_C_plus_plus, 199711),
+ "C++98");
+ EXPECT_EQ(llvm::dwarf::LanguageDescription(DW_LNAME_C_plus_plus, 201402),
+ "C++14");
+
+ // Versions round up.
+ EXPECT_EQ(llvm::dwarf::LanguageDescription(DW_LNAME_C_plus_plus, 201400),
+ "C++14");
+
+ // Version 0 for C and C++ is an unversioned name.
+ EXPECT_EQ(llvm::dwarf::LanguageDescription(DW_LNAME_C, 0), "C (K&R and ISO)");
+ EXPECT_EQ(llvm::dwarf::LanguageDescription(DW_LNAME_C_plus_plus, 0),
+ "ISO C++");
+
+ // Version 0 for other versioned languages may not be the unversioned name.
+ EXPECT_EQ(llvm::dwarf::LanguageDescription(DW_LNAME_Fortran, 0),
+ "FORTRAN 77");
+}
} // end namespace
diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp
index ae7960e948801..49773196129a8 100644
--- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp
+++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp
@@ -2275,47 +2275,4 @@ TEST(DWARFDebugInfo, TestDWARF64UnitLength) {
ASSERT_EQ(0x1122334455667788ULL, CU.getLength());
});
}
-
-TEST(DWARFDebugInfo, TestLanguageDescription_Versioned) {
- // Tests for the llvm::dwarf::LanguageDescription API that
- // takes a name *and* a version.
-
- // Unknown language.
- EXPECT_EQ(
- llvm::dwarf::LanguageDescription(static_cast<SourceLanguageName>(0)),
- "Unknown");
-
- // Test that specifying an invalid version falls back to a valid language name
- // regardless.
- EXPECT_EQ(llvm::dwarf::LanguageDescription(
- static_cast<SourceLanguageName>(DW_LNAME_ObjC), 0),
- "Objective C");
- EXPECT_EQ(llvm::dwarf::LanguageDescription(
- static_cast<SourceLanguageName>(DW_LNAME_Julia), 0),
- "Julia");
-
- // Check some versions.
- EXPECT_EQ(llvm::dwarf::LanguageDescription(DW_LNAME_C_plus_plus, 199711),
- "C++98");
- EXPECT_EQ(llvm::dwarf::LanguageDescription(DW_LNAME_C_plus_plus, 201402),
- "C++14");
-
- // Versions round up.
- EXPECT_EQ(llvm::dwarf::LanguageDescription(DW_LNAME_C_plus_plus, 201400),
- "C++14");
-
- // Version 0 for C and C++ is an unversioned name.
- EXPECT_EQ(llvm::dwarf::LanguageDescription(
- static_cast<SourceLanguageName>(DW_LNAME_C), 0),
- "C (K&R and ISO)");
- EXPECT_EQ(llvm::dwarf::LanguageDescription(
- static_cast<SourceLanguageName>(DW_LNAME_C_plus_plus), 0),
- "ISO C++");
-
- // Version 0 for other versioned languages may not be the unversioned name.
- EXPECT_EQ(llvm::dwarf::LanguageDescription(
- static_cast<SourceLanguageName>(DW_LNAME_Fortran), 0),
- "FORTRAN 77");
-}
-
} // end anonymous namespace
>From 70e3ed1094cb73bb2d2852b4a9e296e5358675db Mon Sep 17 00:00:00 2001
From: Michael Buch <michaelbuch12 at gmail.com>
Date: Wed, 8 Oct 2025 17:55:49 +0100
Subject: [PATCH 6/6] fixup! add back newline
---
llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp
index 49773196129a8..373a58d259af5 100644
--- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp
+++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp
@@ -2275,4 +2275,5 @@ TEST(DWARFDebugInfo, TestDWARF64UnitLength) {
ASSERT_EQ(0x1122334455667788ULL, CU.getLength());
});
}
+
} // end anonymous namespace
More information about the llvm-commits
mailing list