[llvm] e63c799 - [Demangle] Add support for D simple single qualified names
David Blaikie via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 29 16:06:27 PST 2021
Author: David Blaikie
Date: 2021-11-29T16:05:32-08:00
New Revision: e63c799a767b0f682af62eba9d1d375c59e58627
URL: https://github.com/llvm/llvm-project/commit/e63c799a767b0f682af62eba9d1d375c59e58627
DIFF: https://github.com/llvm/llvm-project/commit/e63c799a767b0f682af62eba9d1d375c59e58627.diff
LOG: [Demangle] Add support for D simple single qualified names
This patch adds support for simple single qualified names that includes
internal mangled names and normal symbol names.
Differential Revision: https://reviews.llvm.org/D111415
Added:
Modified:
llvm/lib/Demangle/DLangDemangle.cpp
llvm/unittests/Demangle/DLangDemangleTest.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Demangle/DLangDemangle.cpp b/llvm/lib/Demangle/DLangDemangle.cpp
index d2f1bf4323ee5..73d5ce1907e94 100644
--- a/llvm/lib/Demangle/DLangDemangle.cpp
+++ b/llvm/lib/Demangle/DLangDemangle.cpp
@@ -14,12 +14,214 @@
//===----------------------------------------------------------------------===//
#include "llvm/Demangle/Demangle.h"
+#include "llvm/Demangle/StringView.h"
#include "llvm/Demangle/Utility.h"
#include <cstring>
+#include <limits>
using namespace llvm;
using llvm::itanium_demangle::OutputBuffer;
+using llvm::itanium_demangle::StringView;
+
+namespace {
+
+/// Demangle information structure.
+struct Demangler {
+ /// Initialize the information structure we use to pass around information.
+ ///
+ /// \param Mangled String to demangle.
+ Demangler(const char *Mangled);
+
+ /// Extract and demangle the mangled symbol and append it to the output
+ /// string.
+ ///
+ /// \param Demangled Output buffer to write the demangled name.
+ ///
+ /// \return The remaining string on success or nullptr on failure.
+ ///
+ /// \see https://dlang.org/spec/abi.html#name_mangling .
+ /// \see https://dlang.org/spec/abi.html#MangledName .
+ const char *parseMangle(OutputBuffer *Demangled);
+
+private:
+ /// Extract and demangle a given mangled symbol and append it to the output
+ /// string.
+ ///
+ /// \param Demangled output buffer to write the demangled name.
+ /// \param Mangled mangled symbol to be demangled.
+ ///
+ /// \return The remaining string on success or nullptr on failure.
+ ///
+ /// \see https://dlang.org/spec/abi.html#name_mangling .
+ /// \see https://dlang.org/spec/abi.html#MangledName .
+ const char *parseMangle(OutputBuffer *Demangled, const char *Mangled);
+
+ /// Extract the number from a given string.
+ ///
+ /// \param Mangled string to extract the number.
+ /// \param Ret assigned result value.
+ ///
+ /// \return The remaining string on success or nullptr on failure.
+ ///
+ /// \note A result larger than UINT_MAX is considered a failure.
+ ///
+ /// \see https://dlang.org/spec/abi.html#Number .
+ const char *decodeNumber(const char *Mangled, unsigned long *Ret);
+
+ /// Extract and demangle an identifier from a given mangled symbol append it
+ /// to the output string.
+ ///
+ /// \param Demangled Output buffer to write the demangled name.
+ /// \param Mangled Mangled symbol to be demangled.
+ ///
+ /// \return The remaining string on success or nullptr on failure.
+ ///
+ /// \see https://dlang.org/spec/abi.html#SymbolName .
+ const char *parseIdentifier(OutputBuffer *Demangled, const char *Mangled);
+
+ /// Extract and demangle the plain identifier from a given mangled symbol and
+ /// prepend/append it to the output string, with a special treatment for some
+ /// magic compiler generated symbols.
+ ///
+ /// \param Demangled Output buffer to write the demangled name.
+ /// \param Mangled Mangled symbol to be demangled.
+ /// \param Len Length of the mangled symbol name.
+ ///
+ /// \return The remaining string on success or nullptr on failure.
+ ///
+ /// \see https://dlang.org/spec/abi.html#LName .
+ const char *parseLName(OutputBuffer *Demangled, const char *Mangled,
+ unsigned long Len);
+
+ /// Extract and demangle the qualified symbol from a given mangled symbol
+ /// append it to the output string.
+ ///
+ /// \param Demangled Output buffer to write the demangled name.
+ /// \param Mangled Mangled symbol to be demangled.
+ ///
+ /// \return The remaining string on success or nullptr on failure.
+ ///
+ /// \see https://dlang.org/spec/abi.html#QualifiedName .
+ const char *parseQualified(OutputBuffer *Demangled, const char *Mangled);
+
+ /// The string we are demangling.
+ const char *Str;
+};
+
+} // namespace
+
+const char *Demangler::decodeNumber(const char *Mangled, unsigned long *Ret) {
+ // Return nullptr if trying to extract something that isn't a digit.
+ if (Mangled == nullptr || !std::isdigit(*Mangled))
+ return nullptr;
+
+ unsigned long Val = 0;
+
+ do {
+ unsigned long Digit = Mangled[0] - '0';
+
+ // Check for overflow.
+ if (Val > (std::numeric_limits<unsigned int>::max() - Digit) / 10)
+ return nullptr;
+
+ Val = Val * 10 + Digit;
+ ++Mangled;
+ } while (std::isdigit(*Mangled));
+
+ if (*Mangled == '\0')
+ return nullptr;
+
+ *Ret = Val;
+ return Mangled;
+}
+
+const char *Demangler::parseMangle(OutputBuffer *Demangled,
+ const char *Mangled) {
+ // A D mangled symbol is comprised of both scope and type information.
+ // MangleName:
+ // _D QualifiedName Type
+ // _D QualifiedName Z
+ // ^
+ // The caller should have guaranteed that the start pointer is at the
+ // above location.
+ // Note that type is never a function type, but only the return type of
+ // a function or the type of a variable.
+ Mangled += 2;
+
+ Mangled = parseQualified(Demangled, Mangled);
+
+ if (Mangled != nullptr) {
+ // Artificial symbols end with 'Z' and have no type.
+ if (*Mangled == 'Z')
+ ++Mangled;
+ else {
+ // TODO: Implement symbols with types.
+ return nullptr;
+ }
+ }
+
+ return Mangled;
+}
+
+const char *Demangler::parseQualified(OutputBuffer *Demangled,
+ const char *Mangled) {
+ // Qualified names are identifiers separated by their encoded length.
+ // Nested functions also encode their argument types without specifying
+ // what they return.
+ // QualifiedName:
+ // SymbolFunctionName
+ // SymbolFunctionName QualifiedName
+ // ^
+ // SymbolFunctionName:
+ // SymbolName
+ // SymbolName TypeFunctionNoReturn
+ // SymbolName M TypeFunctionNoReturn
+ // SymbolName M TypeModifiers TypeFunctionNoReturn
+ // The start pointer should be at the above location.
+
+ // TODO: Parse multiple identifiers
+
+ return parseIdentifier(Demangled, Mangled);
+}
+
+const char *Demangler::parseIdentifier(OutputBuffer *Demangled,
+ const char *Mangled) {
+ unsigned long Len;
+
+ if (Mangled == nullptr || *Mangled == '\0')
+ return nullptr;
+
+ // TODO: Parse back references and lengthless template instances.
+
+ const char *Endptr = decodeNumber(Mangled, &Len);
+
+ if (Endptr == nullptr || Len == 0)
+ return nullptr;
+
+ if (strlen(Endptr) < Len)
+ return nullptr;
+
+ Mangled = Endptr;
+
+ // TODO: Parse template instances with a length prefix.
+
+ return parseLName(Demangled, Mangled, Len);
+}
+
+const char *Demangler::parseLName(OutputBuffer *Demangled, const char *Mangled,
+ unsigned long Len) {
+ *Demangled << StringView(Mangled, Len);
+ Mangled += Len;
+
+ return Mangled;
+}
+
+Demangler::Demangler(const char *Mangled) : Str(Mangled) {}
+
+const char *Demangler::parseMangle(OutputBuffer *Demangled) {
+ return parseMangle(Demangled, this->Str);
+}
char *llvm::dlangDemangle(const char *MangledName) {
if (MangledName == nullptr || strncmp(MangledName, "_D", 2) != 0)
@@ -29,8 +231,19 @@ char *llvm::dlangDemangle(const char *MangledName) {
if (!initializeOutputBuffer(nullptr, nullptr, Demangled, 1024))
return nullptr;
- if (strcmp(MangledName, "_Dmain") == 0)
+ if (strcmp(MangledName, "_Dmain") == 0) {
Demangled << "D main";
+ } else {
+
+ Demangler D = Demangler(MangledName);
+ MangledName = D.parseMangle(&Demangled);
+
+ // Check that the entire symbol was successfully demangled.
+ if (MangledName == nullptr || *MangledName != '\0') {
+ std::free(Demangled.getBuffer());
+ return nullptr;
+ }
+ }
// OutputBuffer's internal buffer is not null terminated and therefore we need
// to add it to comply with C null terminated strings.
@@ -40,6 +253,6 @@ char *llvm::dlangDemangle(const char *MangledName) {
return Demangled.getBuffer();
}
- free(Demangled.getBuffer());
+ std::free(Demangled.getBuffer());
return nullptr;
}
diff --git a/llvm/unittests/Demangle/DLangDemangleTest.cpp b/llvm/unittests/Demangle/DLangDemangleTest.cpp
index 8a324ecd23219..ad814cb404ec1 100644
--- a/llvm/unittests/Demangle/DLangDemangleTest.cpp
+++ b/llvm/unittests/Demangle/DLangDemangleTest.cpp
@@ -30,4 +30,7 @@ INSTANTIATE_TEST_SUITE_P(DLangDemangleTest, DLangDemangleTestFixture,
testing::Values(std::make_pair("_Dmain", "D main"),
std::make_pair(nullptr, nullptr),
std::make_pair("_Z", nullptr),
- std::make_pair("_DDD", nullptr)));
+ std::make_pair("_DDD", nullptr),
+ std::make_pair("_D88", nullptr),
+ std::make_pair("_D8demangleZ",
+ "demangle")));
More information about the llvm-commits
mailing list