[llvm] bec0879 - [Demangle] Add support for D symbols back referencing
Luís Ferreira via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 12 14:01:50 PST 2022
Author: Luís Ferreira
Date: 2022-01-12T21:57:31Z
New Revision: bec08795db0d9eeeb38514fdb8fdc8e9c99c3324
URL: https://github.com/llvm/llvm-project/commit/bec08795db0d9eeeb38514fdb8fdc8e9c99c3324
DIFF: https://github.com/llvm/llvm-project/commit/bec08795db0d9eeeb38514fdb8fdc8e9c99c3324.diff
LOG: [Demangle] Add support for D symbols back referencing
This patch adds support for identifier back referencing allowing compressed
mangled names by avoiding repetitiveness.
Signed-off-by: Luís Ferreira <contact at lsferreira.net>
Reviewed By: dblaikie
Differential Revision: https://reviews.llvm.org/D111417
Added:
Modified:
llvm/lib/Demangle/DLangDemangle.cpp
llvm/unittests/Demangle/DLangDemangleTest.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Demangle/DLangDemangle.cpp b/llvm/lib/Demangle/DLangDemangle.cpp
index 93156f44d597b..086c76557f5b9 100644
--- a/llvm/lib/Demangle/DLangDemangle.cpp
+++ b/llvm/lib/Demangle/DLangDemangle.cpp
@@ -70,6 +70,41 @@ struct Demangler {
/// \see https://dlang.org/spec/abi.html#Number .
const char *decodeNumber(const char *Mangled, unsigned long *Ret);
+ /// Extract the back reference position from a given string.
+ ///
+ /// \param Mangled string to extract the back reference position.
+ /// \param Ret assigned result value.
+ ///
+ /// \return the remaining string on success or nullptr on failure.
+ ///
+ /// \note Ret is always >= 0 on success, and unspecified on failure
+ ///
+ /// \see https://dlang.org/spec/abi.html#back_ref .
+ /// \see https://dlang.org/spec/abi.html#NumberBackRef .
+ const char *decodeBackrefPos(const char *Mangled, long &Ret);
+
+ /// Extract the symbol pointed by the back reference form a given string.
+ ///
+ /// \param Mangled string to extract the back reference position.
+ /// \param Ret assigned result value.
+ ///
+ /// \return the remaining string on success or nullptr on failure.
+ ///
+ /// \see https://dlang.org/spec/abi.html#back_ref .
+ const char *decodeBackref(const char *Mangled, const char *&Ret);
+
+ /// Extract and demangle backreferenced symbol from a given mangled symbol
+ /// and append it to the output string.
+ ///
+ /// \param Demangled output buffer to write the demangled name.
+ /// \param Mangled mangled symbol to be demangled.
+ ///
+ /// \return the remaining string on success or nullptr on failure.
+ ///
+ /// \see https://dlang.org/spec/abi.html#back_ref .
+ /// \see https://dlang.org/spec/abi.html#IdentifierBackRef .
+ const char *parseSymbolBackref(OutputBuffer *Demangled, const char *Mangled);
+
/// Check whether it is the beginning of a symbol name.
///
/// \param Mangled string to extract the symbol name.
@@ -156,12 +191,108 @@ const char *Demangler::decodeNumber(const char *Mangled, unsigned long *Ret) {
return Mangled;
}
+const char *Demangler::decodeBackrefPos(const char *Mangled, long &Ret) {
+ // Return nullptr if trying to extract something that isn't a digit
+ if (Mangled == nullptr || !std::isalpha(*Mangled))
+ return nullptr;
+
+ // Any identifier or non-basic type that has been emitted to the mangled
+ // symbol before will not be emitted again, but is referenced by a special
+ // sequence encoding the relative position of the original occurrence in the
+ // mangled symbol name.
+ // Numbers in back references are encoded with base 26 by upper case letters
+ // A-Z for higher digits but lower case letters a-z for the last digit.
+ // NumberBackRef:
+ // [a-z]
+ // [A-Z] NumberBackRef
+ // ^
+ unsigned long Val = 0;
+
+ while (std::isalpha(*Mangled)) {
+ // Check for overflow
+ if (Val > (std::numeric_limits<unsigned long>::max() - 25) / 26)
+ break;
+
+ Val *= 26;
+
+ if (Mangled[0] >= 'a' && Mangled[0] <= 'z') {
+ Val += Mangled[0] - 'a';
+ if ((long)Val <= 0)
+ break;
+ Ret = Val;
+ return Mangled + 1;
+ }
+
+ Val += Mangled[0] - 'A';
+ ++Mangled;
+ }
+
+ return nullptr;
+}
+
+const char *Demangler::decodeBackref(const char *Mangled, const char *&Ret) {
+ assert(Mangled != nullptr && *Mangled == 'Q' && "Invalid back reference!");
+ Ret = nullptr;
+
+ // Position of 'Q'
+ const char *Qpos = Mangled;
+ long RefPos;
+ ++Mangled;
+
+ Mangled = decodeBackrefPos(Mangled, RefPos);
+ if (Mangled == nullptr)
+ return nullptr;
+
+ if (RefPos > Qpos - Str)
+ return nullptr;
+
+ // Set the position of the back reference.
+ Ret = Qpos - RefPos;
+
+ return Mangled;
+}
+
+const char *Demangler::parseSymbolBackref(OutputBuffer *Demangled,
+ const char *Mangled) {
+ // An identifier back reference always points to a digit 0 to 9.
+ // IdentifierBackRef:
+ // Q NumberBackRef
+ // ^
+ const char *Backref;
+ unsigned long Len;
+
+ // Get position of the back reference
+ Mangled = decodeBackref(Mangled, Backref);
+
+ // Must point to a simple identifier
+ Backref = decodeNumber(Backref, &Len);
+ if (Backref == nullptr || strlen(Backref) < Len)
+ return nullptr;
+
+ Backref = parseLName(Demangled, Backref, Len);
+ if (Backref == nullptr)
+ return nullptr;
+
+ return Mangled;
+}
+
bool Demangler::isSymbolName(const char *Mangled) {
+ long Ret;
+ const char *Qref = Mangled;
+
if (std::isdigit(*Mangled))
return true;
- // TODO: Handle symbol back references and template instances.
- return false;
+ // TODO: Handle template instances.
+
+ if (*Mangled != 'Q')
+ return false;
+
+ Mangled = decodeBackrefPos(Mangled + 1, Ret);
+ if (Mangled == nullptr || Ret > Qref - Str)
+ return false;
+
+ return std::isdigit(Qref[-Ret]);
}
const char *Demangler::parseMangle(OutputBuffer *Demangled,
@@ -237,7 +368,10 @@ const char *Demangler::parseIdentifier(OutputBuffer *Demangled,
if (Mangled == nullptr || *Mangled == '\0')
return nullptr;
- // TODO: Parse back references and lengthless template instances.
+ if (*Mangled == 'Q')
+ return parseSymbolBackref(Demangled, Mangled);
+
+ // TODO: Parse lengthless template instances.
const char *Endptr = decodeNumber(Mangled, &Len);
diff --git a/llvm/unittests/Demangle/DLangDemangleTest.cpp b/llvm/unittests/Demangle/DLangDemangleTest.cpp
index e5bd4c4602149..d76bf36c14598 100644
--- a/llvm/unittests/Demangle/DLangDemangleTest.cpp
+++ b/llvm/unittests/Demangle/DLangDemangleTest.cpp
@@ -52,4 +52,15 @@ INSTANTIATE_TEST_SUITE_P(
std::make_pair("_D8demangle3foo",
nullptr), // symbol without a type sequence.
std::make_pair("_D8demangle3fooinvalidtypeseq",
- nullptr))); // invalid type sequence.
+ nullptr), // invalid type sequence.
+ std::make_pair(
+ "_D8demangle3ABCQe1ai",
+ "demangle.ABC.ABC.a"), // symbol back reference: `Qe` is a back
+ // reference for position 5, counting from e
+ // char, so decoding it points to `3`. Since
+ // `3` is a number, 3 chars get read and it
+ // succeeded.
+ std::make_pair("_D8demangle3ABCQa1ai",
+ nullptr), // invalid symbol back reference (recursive).
+ std::make_pair("_D8demangleQDXXXXXXXXXXXXx",
+ nullptr))); // overflow back reference position.
More information about the llvm-commits
mailing list