[llvm] e63c799 - [Demangle] Add support for D simple single qualified names

David Blaikie via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 29 16:06:27 PST 2021


Author: David Blaikie
Date: 2021-11-29T16:05:32-08:00
New Revision: e63c799a767b0f682af62eba9d1d375c59e58627

URL: https://github.com/llvm/llvm-project/commit/e63c799a767b0f682af62eba9d1d375c59e58627
DIFF: https://github.com/llvm/llvm-project/commit/e63c799a767b0f682af62eba9d1d375c59e58627.diff

LOG: [Demangle] Add support for D simple single qualified names

    This patch adds support for simple single qualified names that includes
    internal mangled names and normal symbol names.

Differential Revision: https://reviews.llvm.org/D111415

Added: 
    

Modified: 
    llvm/lib/Demangle/DLangDemangle.cpp
    llvm/unittests/Demangle/DLangDemangleTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Demangle/DLangDemangle.cpp b/llvm/lib/Demangle/DLangDemangle.cpp
index d2f1bf4323ee5..73d5ce1907e94 100644
--- a/llvm/lib/Demangle/DLangDemangle.cpp
+++ b/llvm/lib/Demangle/DLangDemangle.cpp
@@ -14,12 +14,214 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Demangle/Demangle.h"
+#include "llvm/Demangle/StringView.h"
 #include "llvm/Demangle/Utility.h"
 
 #include <cstring>
+#include <limits>
 
 using namespace llvm;
 using llvm::itanium_demangle::OutputBuffer;
+using llvm::itanium_demangle::StringView;
+
+namespace {
+
+/// Demangle information structure.
+struct Demangler {
+  /// Initialize the information structure we use to pass around information.
+  ///
+  /// \param Mangled String to demangle.
+  Demangler(const char *Mangled);
+
+  /// Extract and demangle the mangled symbol and append it to the output
+  /// string.
+  ///
+  /// \param Demangled Output buffer to write the demangled name.
+  ///
+  /// \return The remaining string on success or nullptr on failure.
+  ///
+  /// \see https://dlang.org/spec/abi.html#name_mangling .
+  /// \see https://dlang.org/spec/abi.html#MangledName .
+  const char *parseMangle(OutputBuffer *Demangled);
+
+private:
+  /// Extract and demangle a given mangled symbol and append it to the output
+  /// string.
+  ///
+  /// \param Demangled output buffer to write the demangled name.
+  /// \param Mangled mangled symbol to be demangled.
+  ///
+  /// \return The remaining string on success or nullptr on failure.
+  ///
+  /// \see https://dlang.org/spec/abi.html#name_mangling .
+  /// \see https://dlang.org/spec/abi.html#MangledName .
+  const char *parseMangle(OutputBuffer *Demangled, const char *Mangled);
+
+  /// Extract the number from a given string.
+  ///
+  /// \param Mangled string to extract the number.
+  /// \param Ret assigned result value.
+  ///
+  /// \return The remaining string on success or nullptr on failure.
+  ///
+  /// \note A result larger than UINT_MAX is considered a failure.
+  ///
+  /// \see https://dlang.org/spec/abi.html#Number .
+  const char *decodeNumber(const char *Mangled, unsigned long *Ret);
+
+  /// Extract and demangle an identifier from a given mangled symbol append it
+  /// to the output string.
+  ///
+  /// \param Demangled Output buffer to write the demangled name.
+  /// \param Mangled Mangled symbol to be demangled.
+  ///
+  /// \return The remaining string on success or nullptr on failure.
+  ///
+  /// \see https://dlang.org/spec/abi.html#SymbolName .
+  const char *parseIdentifier(OutputBuffer *Demangled, const char *Mangled);
+
+  /// Extract and demangle the plain identifier from a given mangled symbol and
+  /// prepend/append it to the output string, with a special treatment for some
+  /// magic compiler generated symbols.
+  ///
+  /// \param Demangled Output buffer to write the demangled name.
+  /// \param Mangled Mangled symbol to be demangled.
+  /// \param Len Length of the mangled symbol name.
+  ///
+  /// \return The remaining string on success or nullptr on failure.
+  ///
+  /// \see https://dlang.org/spec/abi.html#LName .
+  const char *parseLName(OutputBuffer *Demangled, const char *Mangled,
+                         unsigned long Len);
+
+  /// Extract and demangle the qualified symbol from a given mangled symbol
+  /// append it to the output string.
+  ///
+  /// \param Demangled Output buffer to write the demangled name.
+  /// \param Mangled Mangled symbol to be demangled.
+  ///
+  /// \return The remaining string on success or nullptr on failure.
+  ///
+  /// \see https://dlang.org/spec/abi.html#QualifiedName .
+  const char *parseQualified(OutputBuffer *Demangled, const char *Mangled);
+
+  /// The string we are demangling.
+  const char *Str;
+};
+
+} // namespace
+
+const char *Demangler::decodeNumber(const char *Mangled, unsigned long *Ret) {
+  // Return nullptr if trying to extract something that isn't a digit.
+  if (Mangled == nullptr || !std::isdigit(*Mangled))
+    return nullptr;
+
+  unsigned long Val = 0;
+
+  do {
+    unsigned long Digit = Mangled[0] - '0';
+
+    // Check for overflow.
+    if (Val > (std::numeric_limits<unsigned int>::max() - Digit) / 10)
+      return nullptr;
+
+    Val = Val * 10 + Digit;
+    ++Mangled;
+  } while (std::isdigit(*Mangled));
+
+  if (*Mangled == '\0')
+    return nullptr;
+
+  *Ret = Val;
+  return Mangled;
+}
+
+const char *Demangler::parseMangle(OutputBuffer *Demangled,
+                                   const char *Mangled) {
+  // A D mangled symbol is comprised of both scope and type information.
+  //    MangleName:
+  //        _D QualifiedName Type
+  //        _D QualifiedName Z
+  //        ^
+  // The caller should have guaranteed that the start pointer is at the
+  // above location.
+  // Note that type is never a function type, but only the return type of
+  // a function or the type of a variable.
+  Mangled += 2;
+
+  Mangled = parseQualified(Demangled, Mangled);
+
+  if (Mangled != nullptr) {
+    // Artificial symbols end with 'Z' and have no type.
+    if (*Mangled == 'Z')
+      ++Mangled;
+    else {
+      // TODO: Implement symbols with types.
+      return nullptr;
+    }
+  }
+
+  return Mangled;
+}
+
+const char *Demangler::parseQualified(OutputBuffer *Demangled,
+                                      const char *Mangled) {
+  // Qualified names are identifiers separated by their encoded length.
+  // Nested functions also encode their argument types without specifying
+  // what they return.
+  //    QualifiedName:
+  //        SymbolFunctionName
+  //        SymbolFunctionName QualifiedName
+  //        ^
+  //    SymbolFunctionName:
+  //        SymbolName
+  //        SymbolName TypeFunctionNoReturn
+  //        SymbolName M TypeFunctionNoReturn
+  //        SymbolName M TypeModifiers TypeFunctionNoReturn
+  // The start pointer should be at the above location.
+
+  // TODO: Parse multiple identifiers
+
+  return parseIdentifier(Demangled, Mangled);
+}
+
+const char *Demangler::parseIdentifier(OutputBuffer *Demangled,
+                                       const char *Mangled) {
+  unsigned long Len;
+
+  if (Mangled == nullptr || *Mangled == '\0')
+    return nullptr;
+
+  // TODO: Parse back references and lengthless template instances.
+
+  const char *Endptr = decodeNumber(Mangled, &Len);
+
+  if (Endptr == nullptr || Len == 0)
+    return nullptr;
+
+  if (strlen(Endptr) < Len)
+    return nullptr;
+
+  Mangled = Endptr;
+
+  // TODO: Parse template instances with a length prefix.
+
+  return parseLName(Demangled, Mangled, Len);
+}
+
+const char *Demangler::parseLName(OutputBuffer *Demangled, const char *Mangled,
+                                  unsigned long Len) {
+  *Demangled << StringView(Mangled, Len);
+  Mangled += Len;
+
+  return Mangled;
+}
+
+Demangler::Demangler(const char *Mangled) : Str(Mangled) {}
+
+const char *Demangler::parseMangle(OutputBuffer *Demangled) {
+  return parseMangle(Demangled, this->Str);
+}
 
 char *llvm::dlangDemangle(const char *MangledName) {
   if (MangledName == nullptr || strncmp(MangledName, "_D", 2) != 0)
@@ -29,8 +231,19 @@ char *llvm::dlangDemangle(const char *MangledName) {
   if (!initializeOutputBuffer(nullptr, nullptr, Demangled, 1024))
     return nullptr;
 
-  if (strcmp(MangledName, "_Dmain") == 0)
+  if (strcmp(MangledName, "_Dmain") == 0) {
     Demangled << "D main";
+  } else {
+
+    Demangler D = Demangler(MangledName);
+    MangledName = D.parseMangle(&Demangled);
+
+    // Check that the entire symbol was successfully demangled.
+    if (MangledName == nullptr || *MangledName != '\0') {
+      std::free(Demangled.getBuffer());
+      return nullptr;
+    }
+  }
 
   // OutputBuffer's internal buffer is not null terminated and therefore we need
   // to add it to comply with C null terminated strings.
@@ -40,6 +253,6 @@ char *llvm::dlangDemangle(const char *MangledName) {
     return Demangled.getBuffer();
   }
 
-  free(Demangled.getBuffer());
+  std::free(Demangled.getBuffer());
   return nullptr;
 }

diff  --git a/llvm/unittests/Demangle/DLangDemangleTest.cpp b/llvm/unittests/Demangle/DLangDemangleTest.cpp
index 8a324ecd23219..ad814cb404ec1 100644
--- a/llvm/unittests/Demangle/DLangDemangleTest.cpp
+++ b/llvm/unittests/Demangle/DLangDemangleTest.cpp
@@ -30,4 +30,7 @@ INSTANTIATE_TEST_SUITE_P(DLangDemangleTest, DLangDemangleTestFixture,
                          testing::Values(std::make_pair("_Dmain", "D main"),
                                          std::make_pair(nullptr, nullptr),
                                          std::make_pair("_Z", nullptr),
-                                         std::make_pair("_DDD", nullptr)));
+                                         std::make_pair("_DDD", nullptr),
+                                         std::make_pair("_D88", nullptr),
+                                         std::make_pair("_D8demangleZ",
+                                                        "demangle")));


        


More information about the llvm-commits mailing list