[libcxx-commits] [libcxxabi] [llvm] [demangle] Represent a char array initializer as a string literal. (PR #109021)
Richard Smith via libcxx-commits
libcxx-commits at lists.llvm.org
Tue Sep 17 10:35:04 PDT 2024
https://github.com/zygoloid created https://github.com/llvm/llvm-project/pull/109021
This improves the demangling for non-type template arguments that
contain string literals. Previously we'd produce
`char [4]{(char)65, (char)66, (char)67}`
(which isn't valid C or C++), and now we produce `"ABC"`.
The new demangling is always shorter, even when using an escape sequence
for every character, and much more readable when the char array contains
text.
>From 2ab0b4068b6d51949fd2e0ab5ebda4eb708cf526 Mon Sep 17 00:00:00 2001
From: Richard Smith <richard at metafoo.co.uk>
Date: Tue, 17 Sep 2024 17:27:33 +0000
Subject: [PATCH] [demangle] Represent a char array initializer as a string
literal.
This improves the demangling for non-type template arguments that
contain string literals. Previously we'd produce
char [4]{(char)65, (char)66, (char)67}
(which isn't valid C or C++), and now we produce
"ABC"
The new demangling is always shorter, even when using an escape sequence
for every character, and much more readable when the char array contains
text.
---
libcxxabi/src/demangle/ItaniumDemangle.h | 116 ++++++++++++++++++-
libcxxabi/test/test_demangle.pass.cpp | 27 ++++-
llvm/include/llvm/Demangle/ItaniumDemangle.h | 116 ++++++++++++++++++-
3 files changed, 256 insertions(+), 3 deletions(-)
diff --git a/libcxxabi/src/demangle/ItaniumDemangle.h b/libcxxabi/src/demangle/ItaniumDemangle.h
index 3b041efe3aac00..8032c893fbae6e 100644
--- a/libcxxabi/src/demangle/ItaniumDemangle.h
+++ b/libcxxabi/src/demangle/ItaniumDemangle.h
@@ -156,6 +156,8 @@ template <class T, size_t N> class PODSmallVector {
}
};
+class NodeArray;
+
// Base class of all AST nodes. The AST is built by the parser, then is
// traversed by the printLeft/Right functions to produce a demangled string.
class Node {
@@ -293,6 +295,13 @@ class Node {
// implementation.
virtual void printRight(OutputBuffer &) const {}
+ // Print an initializer list of this type. Returns true if we printed a custom
+ // representation, false if nothing has been printed and the default
+ // representation should be used.
+ virtual bool printInitListAsType(OutputBuffer &, const NodeArray &) const {
+ return false;
+ }
+
virtual std::string_view getBaseName() const { return {}; }
// Silence compiler warnings, this dtor will never be called.
@@ -339,6 +348,10 @@ class NodeArray {
FirstElement = false;
}
}
+
+ // Print an array of integer literals as a string literal. Returns whether we
+ // could do so.
+ bool printAsString(OutputBuffer &OB) const;
};
struct NodeArrayNode : Node {
@@ -796,6 +809,15 @@ class ArrayType final : public Node {
OB += "]";
Base->printRight(OB);
}
+
+ bool printInitListAsType(OutputBuffer &OB,
+ const NodeArray &Elements) const override {
+ if (Base->getKind() == KNameType &&
+ static_cast<const NameType *>(Base)->getName() == "char") {
+ return Elements.printAsString(OB);
+ }
+ return false;
+ }
};
class FunctionType final : public Node {
@@ -2225,8 +2247,11 @@ class InitListExpr : public Node {
template<typename Fn> void match(Fn F) const { F(Ty, Inits); }
void printLeft(OutputBuffer &OB) const override {
- if (Ty)
+ if (Ty) {
+ if (Ty->printInitListAsType(OB, Inits))
+ return;
Ty->print(OB);
+ }
OB += '{';
Inits.printWithComma(OB);
OB += '}';
@@ -2433,6 +2458,8 @@ class IntegerLiteral : public Node {
if (Type.size() <= 3)
OB += Type;
}
+
+ std::string_view value() const { return Value; }
};
class RequiresExpr : public Node {
@@ -2604,6 +2631,93 @@ template<typename NodeT> struct NodeKind;
};
#include "ItaniumNodes.def"
+bool NodeArray::printAsString(OutputBuffer &OB) const {
+ auto Fail = [&OB, StartPos = OB.getCurrentPosition()] {
+ OB.setCurrentPosition(StartPos);
+ return false;
+ };
+
+ OB += '"';
+ bool LastWasNumericEscape = false;
+ for (const Node *Element : *this) {
+ if (Element->getKind() != Node::KIntegerLiteral)
+ return Fail();
+ int integer_value = 0;
+ for (char c : static_cast<const IntegerLiteral *>(Element)->value()) {
+ if (c < '0' || c > '9' || integer_value > 25)
+ return Fail();
+ integer_value *= 10;
+ integer_value += c - '0';
+ }
+ if (integer_value > 255)
+ return Fail();
+
+ // Insert a `""` to avoid accidentally extending a numeric escape.
+ if (LastWasNumericEscape) {
+ if ((integer_value >= '0' && integer_value <= '9') ||
+ (integer_value >= 'a' && integer_value <= 'f') ||
+ (integer_value >= 'A' && integer_value <= 'F')) {
+ OB += "\"\"";
+ }
+ }
+
+ LastWasNumericEscape = false;
+
+ // Determine how to print this character.
+ switch (integer_value) {
+ case '\a':
+ OB += "\\a";
+ break;
+ case '\b':
+ OB += "\\b";
+ break;
+ case '\f':
+ OB += "\\f";
+ break;
+ case '\n':
+ OB += "\\n";
+ break;
+ case '\r':
+ OB += "\\r";
+ break;
+ case '\t':
+ OB += "\\t";
+ break;
+ case '\v':
+ OB += "\\v";
+ break;
+
+ case '"':
+ OB += "\\\"";
+ break;
+ case '\\':
+ OB += "\\\\";
+ break;
+
+ default:
+ // We assume that the character is ASCII, and use a numeric escape for all
+ // remaining non-printable ASCII characters.
+ if (integer_value < 32 || integer_value == 127) {
+ constexpr char Hex[] = "0123456789ABCDEF";
+ OB += '\\';
+ if (integer_value > 7)
+ OB += 'x';
+ if (integer_value >= 16)
+ OB += Hex[integer_value >> 4];
+ OB += Hex[integer_value & 0xF];
+ LastWasNumericEscape = true;
+ break;
+ }
+
+ // Assume all remaining characters are directly printable.
+ OB += (char)integer_value;
+ break;
+ }
+ }
+ OB += '"';
+ return true;
+}
+
template <typename Derived, typename Alloc> struct AbstractManglingParser {
const char *First;
const char *Last;
diff --git a/libcxxabi/test/test_demangle.pass.cpp b/libcxxabi/test/test_demangle.pass.cpp
index 77f79e0d40e84f..c8d4ca8637e8da 100644
--- a/libcxxabi/test/test_demangle.pass.cpp
+++ b/libcxxabi/test/test_demangle.pass.cpp
@@ -30037,7 +30037,32 @@ const char* cases[][2] =
// FIXME: This is not valid pointer-to-member syntax.
{"_Z1fIXtl1DmcM7DerivedKiadL_ZN11MoreDerived1zEEn8EEEEvv", "void f<D{(int const Derived::*)(&MoreDerived::z)}>()"},
{"_Z1fIXtl1Edi1nLi42EEEEvv", "void f<E{.n = 42}>()"},
- {"_ZTAXtl1StlA32_cLc104ELc101ELc108ELc108ELc111ELc32ELc119ELc111ELc114ELc108ELc100EEEE", "template parameter object for S{char [32]{(char)104, (char)101, (char)108, (char)108, (char)111, (char)32, (char)119, (char)111, (char)114, (char)108, (char)100}}"},
+ // Arrays of char are formatted as string literals. Escape sequences are
+ // used for non-printable ASCII characters.
+ // FIXME: We should do the same for arrays of charN_t and wchar_t.
+ {"_ZTAXtl1StlA32_cLc104ELc101ELc108ELc108ELc111ELc32ELc119ELc111ELc114ELc108ELc100EEEE", "template parameter object for S{\"hello world\"}"},
+ {"_Z1fIXtl5HellotlA6_cLc72ELc101ELc108ELc108ELc111EEEEEvv", "void f<Hello{\"Hello\"}>()"},
+ {"_Z1fIXtl5HellotlA6_cLc72ELc101ELc108ELc111EEEEEvv", "void f<Hello{\"Helo\"}>()"},
+ {"_Z1fIXtl5HellotlA6_cLc72ELc101ELc0ELc108ELc111EEEEEvv", "void f<Hello{\"He\\0lo\"}>()"},
+ {"_Z1fIXtl5HellotlA6_cLc72ELc101ELc1ELc108ELc111EEEEEvv", "void f<Hello{\"He\\1lo\"}>()"},
+ {"_Z1fIXtl5HellotlA6_cLc72ELc101ELc6ELc108ELc111EEEEEvv", "void f<Hello{\"He\\6lo\"}>()"},
+ {"_Z1fIXtl5HellotlA6_cLc72ELc101ELc7ELc108ELc111EEEEEvv", "void f<Hello{\"He\\alo\"}>()"},
+ {"_Z1fIXtl5HellotlA6_cLc72ELc101ELc8ELc108ELc111EEEEEvv", "void f<Hello{\"He\\blo\"}>()"},
+ {"_Z1fIXtl5HellotlA6_cLc72ELc101ELc9ELc108ELc111EEEEEvv", "void f<Hello{\"He\\tlo\"}>()"},
+ {"_Z1fIXtl5HellotlA6_cLc72ELc101ELc10ELc108ELc111EEEEEvv", "void f<Hello{\"He\\nlo\"}>()"},
+ {"_Z1fIXtl5HellotlA6_cLc72ELc101ELc11ELc108ELc111EEEEEvv", "void f<Hello{\"He\\vlo\"}>()"},
+ {"_Z1fIXtl5HellotlA6_cLc72ELc101ELc12ELc108ELc111EEEEEvv", "void f<Hello{\"He\\flo\"}>()"},
+ {"_Z1fIXtl5HellotlA6_cLc72ELc101ELc13ELc108ELc111EEEEEvv", "void f<Hello{\"He\\rlo\"}>()"},
+ {"_Z1fIXtl5HellotlA6_cLc72ELc101ELc14ELc108ELc111EEEEEvv", "void f<Hello{\"He\\xElo\"}>()"},
+ {"_Z1fIXtl5HellotlA6_cLc72ELc101ELc15ELc108ELc111EEEEEvv", "void f<Hello{\"He\\xFlo\"}>()"},
+ {"_Z1fIXtl5HellotlA6_cLc72ELc101ELc16ELc108ELc111EEEEEvv", "void f<Hello{\"He\\x10lo\"}>()"},
+ {"_Z1fIXtl5HellotlA6_cLc72ELc101ELc34ELc108ELc111EEEEEvv", "void f<Hello{\"He\\\"lo\"}>()"},
+ {"_Z1fIXtl5HellotlA6_cLc72ELc101ELc92ELc108ELc111EEEEEvv", "void f<Hello{\"He\\\\lo\"}>()"},
+ {"_Z1fIXtl5HellotlA6_cLc15ELc101ELc108ELc108ELc111EEEEEvv", "void f<Hello{\"\\xF\"\"ello\"}>()"},
+ {"_Z1fIXtl5HellotlA6_cLc240ELc159ELc152ELc138ELc33EEEEEvv", "void f<Hello{\"😊!\"}>()"},
+ // Even non-null-terminated strings get this treatment, even though this
+ // isn't valid C++ syntax to initialize an array of char.
+ {"_Z1fIXtl5HellotlA5_cLc72ELc101ELc108ELc108ELc111EEEEEvv", "void f<Hello{\"Hello\"}>()"},
// FIXME: This is wrong; the S2_ backref should expand to OT_ and then to
// "double&&". But we can't cope with a substitution that represents a
diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h
index 0af0224bc83fa8..401dc4f5a4878c 100644
--- a/llvm/include/llvm/Demangle/ItaniumDemangle.h
+++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h
@@ -156,6 +156,8 @@ template <class T, size_t N> class PODSmallVector {
}
};
+class NodeArray;
+
// Base class of all AST nodes. The AST is built by the parser, then is
// traversed by the printLeft/Right functions to produce a demangled string.
class Node {
@@ -293,6 +295,13 @@ class Node {
// implementation.
virtual void printRight(OutputBuffer &) const {}
+ // Print an initializer list of this type. Returns true if we printed a custom
+ // representation, false if nothing has been printed and the default
+ // representation should be used.
+ virtual bool printInitListAsType(OutputBuffer &, const NodeArray &) const {
+ return false;
+ }
+
virtual std::string_view getBaseName() const { return {}; }
// Silence compiler warnings, this dtor will never be called.
@@ -339,6 +348,10 @@ class NodeArray {
FirstElement = false;
}
}
+
+ // Print an array of integer literals as a string literal. Returns whether we
+ // could do so.
+ bool printAsString(OutputBuffer &OB) const;
};
struct NodeArrayNode : Node {
@@ -796,6 +809,15 @@ class ArrayType final : public Node {
OB += "]";
Base->printRight(OB);
}
+
+ bool printInitListAsType(OutputBuffer &OB,
+ const NodeArray &Elements) const override {
+ if (Base->getKind() == KNameType &&
+ static_cast<const NameType *>(Base)->getName() == "char") {
+ return Elements.printAsString(OB);
+ }
+ return false;
+ }
};
class FunctionType final : public Node {
@@ -2225,8 +2247,11 @@ class InitListExpr : public Node {
template<typename Fn> void match(Fn F) const { F(Ty, Inits); }
void printLeft(OutputBuffer &OB) const override {
- if (Ty)
+ if (Ty) {
+ if (Ty->printInitListAsType(OB, Inits))
+ return;
Ty->print(OB);
+ }
OB += '{';
Inits.printWithComma(OB);
OB += '}';
@@ -2433,6 +2458,8 @@ class IntegerLiteral : public Node {
if (Type.size() <= 3)
OB += Type;
}
+
+ std::string_view value() const { return Value; }
};
class RequiresExpr : public Node {
@@ -2604,6 +2631,93 @@ template<typename NodeT> struct NodeKind;
};
#include "ItaniumNodes.def"
+bool NodeArray::printAsString(OutputBuffer &OB) const {
+ auto Fail = [&OB, StartPos = OB.getCurrentPosition()] {
+ OB.setCurrentPosition(StartPos);
+ return false;
+ };
+
+ OB += '"';
+ bool LastWasNumericEscape = false;
+ for (const Node *Element : *this) {
+ if (Element->getKind() != Node::KIntegerLiteral)
+ return Fail();
+ int integer_value = 0;
+ for (char c : static_cast<const IntegerLiteral *>(Element)->value()) {
+ if (c < '0' || c > '9' || integer_value > 25)
+ return Fail();
+ integer_value *= 10;
+ integer_value += c - '0';
+ }
+ if (integer_value > 255)
+ return Fail();
+
+ // Insert a `""` to avoid accidentally extending a numeric escape.
+ if (LastWasNumericEscape) {
+ if ((integer_value >= '0' && integer_value <= '9') ||
+ (integer_value >= 'a' && integer_value <= 'f') ||
+ (integer_value >= 'A' && integer_value <= 'F')) {
+ OB += "\"\"";
+ }
+ }
+
+ LastWasNumericEscape = false;
+
+ // Determine how to print this character.
+ switch (integer_value) {
+ case '\a':
+ OB += "\\a";
+ break;
+ case '\b':
+ OB += "\\b";
+ break;
+ case '\f':
+ OB += "\\f";
+ break;
+ case '\n':
+ OB += "\\n";
+ break;
+ case '\r':
+ OB += "\\r";
+ break;
+ case '\t':
+ OB += "\\t";
+ break;
+ case '\v':
+ OB += "\\v";
+ break;
+
+ case '"':
+ OB += "\\\"";
+ break;
+ case '\\':
+ OB += "\\\\";
+ break;
+
+ default:
+ // We assume that the character is ASCII, and use a numeric escape for all
+ // remaining non-printable ASCII characters.
+ if (integer_value < 32 || integer_value == 127) {
+ constexpr char Hex[] = "0123456789ABCDEF";
+ OB += '\\';
+ if (integer_value > 7)
+ OB += 'x';
+ if (integer_value >= 16)
+ OB += Hex[integer_value >> 4];
+ OB += Hex[integer_value & 0xF];
+ LastWasNumericEscape = true;
+ break;
+ }
+
+ // Assume all remaining characters are directly printable.
+ OB += (char)integer_value;
+ break;
+ }
+ }
+ OB += '"';
+ return true;
+}
+
template <typename Derived, typename Alloc> struct AbstractManglingParser {
const char *First;
const char *Last;
More information about the libcxx-commits
mailing list