[clang] f33c2c2 - Fix crash on `user defined literals`
Eduardo Caldas via cfe-commits
cfe-commits at lists.llvm.org
Fri Jul 10 09:21:23 PDT 2020
Author: Eduardo Caldas
Date: 2020-07-10T16:21:11Z
New Revision: f33c2c27a8d4ea831aa7c2c2649066be91318d85
URL: https://github.com/llvm/llvm-project/commit/f33c2c27a8d4ea831aa7c2c2649066be91318d85
DIFF: https://github.com/llvm/llvm-project/commit/f33c2c27a8d4ea831aa7c2c2649066be91318d85.diff
LOG: Fix crash on `user defined literals`
Summary:
Given an UserDefinedLiteral `1.2_w`:
Problem: Lexer generates one Token for the literal, but ClangAST
references two source locations
Fix: Ignore the operator and interpret it as the underlying literal.
e.g.: `1.2_w` token generates syntax node IntegerLiteral(1.2_w)
Subscribers: cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D82157
Added:
Modified:
clang/include/clang/Tooling/Syntax/Nodes.h
clang/lib/Tooling/Syntax/BuildTree.cpp
clang/lib/Tooling/Syntax/Nodes.cpp
clang/unittests/Tooling/Syntax/TreeTest.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/Tooling/Syntax/Nodes.h b/clang/include/clang/Tooling/Syntax/Nodes.h
index 97605ceb76b7..fb63c36bc4cc 100644
--- a/clang/include/clang/Tooling/Syntax/Nodes.h
+++ b/clang/include/clang/Tooling/Syntax/Nodes.h
@@ -50,6 +50,11 @@ enum class NodeKind : uint16_t {
StringLiteralExpression,
BoolLiteralExpression,
CxxNullPtrExpression,
+ UnknownUserDefinedLiteralExpression,
+ IntegerUserDefinedLiteralExpression,
+ FloatUserDefinedLiteralExpression,
+ CharUserDefinedLiteralExpression,
+ StringUserDefinedLiteralExpression,
IdExpression,
// Statements.
@@ -325,6 +330,88 @@ class CxxNullPtrExpression final : public Expression {
syntax::Leaf *nullPtrKeyword();
};
+/// Expression for user-defined literal. C++ [lex.ext]
+/// user-defined-literal:
+/// user-defined-integer-literal
+/// user-defined-floating-point-literal
+/// user-defined-string-literal
+/// user-defined-character-literal
+class UserDefinedLiteralExpression : public Expression {
+public:
+ UserDefinedLiteralExpression(NodeKind K) : Expression(K) {}
+ static bool classof(const Node *N) {
+ return N->kind() == NodeKind::UnknownUserDefinedLiteralExpression ||
+ N->kind() == NodeKind::IntegerUserDefinedLiteralExpression ||
+ N->kind() == NodeKind::FloatUserDefinedLiteralExpression ||
+ N->kind() == NodeKind::CharUserDefinedLiteralExpression ||
+ N->kind() == NodeKind::StringUserDefinedLiteralExpression;
+ }
+ syntax::Leaf *literalToken();
+};
+
+// We cannot yet distinguish between user-defined-integer-literal and
+// user-defined-floating-point-literal, when using raw literal operator or
+// numeric literal operator. C++ [lex.ext]p3, p4
+/// Expression for an unknown user-defined-literal.
+class UnknownUserDefinedLiteralExpression final
+ : public UserDefinedLiteralExpression {
+public:
+ UnknownUserDefinedLiteralExpression()
+ : UserDefinedLiteralExpression(
+ NodeKind::UnknownUserDefinedLiteralExpression) {}
+ static bool classof(const Node *N) {
+ return N->kind() == NodeKind::UnknownUserDefinedLiteralExpression;
+ }
+};
+
+/// Expression for user-defined-integer-literal. C++ [lex.ext]
+class IntegerUserDefinedLiteralExpression final
+ : public UserDefinedLiteralExpression {
+public:
+ IntegerUserDefinedLiteralExpression()
+ : UserDefinedLiteralExpression(
+ NodeKind::IntegerUserDefinedLiteralExpression) {}
+ static bool classof(const Node *N) {
+ return N->kind() == NodeKind::IntegerUserDefinedLiteralExpression;
+ }
+};
+
+/// Expression for user-defined-floating-point-literal. C++ [lex.ext]
+class FloatUserDefinedLiteralExpression final
+ : public UserDefinedLiteralExpression {
+public:
+ FloatUserDefinedLiteralExpression()
+ : UserDefinedLiteralExpression(
+ NodeKind::FloatUserDefinedLiteralExpression) {}
+ static bool classof(const Node *N) {
+ return N->kind() == NodeKind::FloatUserDefinedLiteralExpression;
+ }
+};
+
+/// Expression for user-defined-character-literal. C++ [lex.ext]
+class CharUserDefinedLiteralExpression final
+ : public UserDefinedLiteralExpression {
+public:
+ CharUserDefinedLiteralExpression()
+ : UserDefinedLiteralExpression(
+ NodeKind::CharUserDefinedLiteralExpression) {}
+ static bool classof(const Node *N) {
+ return N->kind() == NodeKind::CharUserDefinedLiteralExpression;
+ }
+};
+
+/// Expression for user-defined-string-literal. C++ [lex.ext]
+class StringUserDefinedLiteralExpression final
+ : public UserDefinedLiteralExpression {
+public:
+ StringUserDefinedLiteralExpression()
+ : UserDefinedLiteralExpression(
+ NodeKind::StringUserDefinedLiteralExpression) {}
+ static bool classof(const Node *N) {
+ return N->kind() == NodeKind::StringUserDefinedLiteralExpression;
+ }
+};
+
/// An abstract class for prefix and postfix unary operators.
class UnaryOperatorExpression : public Expression {
public:
diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp
index f9fdf47bff26..8204d3fc66f3 100644
--- a/clang/lib/Tooling/Syntax/BuildTree.cpp
+++ b/clang/lib/Tooling/Syntax/BuildTree.cpp
@@ -216,7 +216,8 @@ static SourceRange getDeclaratorRange(const SourceManager &SM, TypeLoc T,
}
if (Initializer.isValid()) {
auto InitializerEnd = Initializer.getEnd();
- assert(SM.isBeforeInTranslationUnit(End, InitializerEnd) || End == InitializerEnd);
+ assert(SM.isBeforeInTranslationUnit(End, InitializerEnd) ||
+ End == InitializerEnd);
End = InitializerEnd;
}
return SourceRange(Start, End);
@@ -708,6 +709,42 @@ class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> {
return NNS;
}
+ bool TraverseUserDefinedLiteral(UserDefinedLiteral *S) {
+ // The semantic AST node `UserDefinedLiteral` (UDL) may have one child node
+ // referencing the location of the UDL suffix (`_w` in `1.2_w`). The
+ // UDL suffix location does not point to the beginning of a token, so we
+ // can't represent the UDL suffix as a separate syntax tree node.
+
+ return WalkUpFromUserDefinedLiteral(S);
+ }
+
+ syntax::NodeKind getUserDefinedLiteralKind(UserDefinedLiteral *S) {
+ switch (S->getLiteralOperatorKind()) {
+ case clang::UserDefinedLiteral::LOK_Integer:
+ return syntax::NodeKind::IntegerUserDefinedLiteralExpression;
+ case clang::UserDefinedLiteral::LOK_Floating:
+ return syntax::NodeKind::FloatUserDefinedLiteralExpression;
+ case clang::UserDefinedLiteral::LOK_Character:
+ return syntax::NodeKind::CharUserDefinedLiteralExpression;
+ case clang::UserDefinedLiteral::LOK_String:
+ return syntax::NodeKind::StringUserDefinedLiteralExpression;
+ case clang::UserDefinedLiteral::LOK_Raw:
+ case clang::UserDefinedLiteral::LOK_Template:
+ // FIXME: Apply `NumericLiteralParser` to the underlying token to deduce
+ // the right UDL kind. That would require a `Preprocessor` though.
+ return syntax::NodeKind::UnknownUserDefinedLiteralExpression;
+ }
+ }
+
+ bool WalkUpFromUserDefinedLiteral(UserDefinedLiteral *S) {
+ Builder.markChildToken(S->getBeginLoc(), syntax::NodeRole::LiteralToken);
+ Builder.foldNode(Builder.getExprRange(S),
+ new (allocator()) syntax::UserDefinedLiteralExpression(
+ getUserDefinedLiteralKind(S)),
+ S);
+ return true;
+ }
+
bool WalkUpFromDeclRefExpr(DeclRefExpr *S) {
if (auto *NNS = BuildNestedNameSpecifier(S->getQualifierLoc()))
Builder.markChild(NNS, syntax::NodeRole::IdExpression_qualifier);
@@ -817,9 +854,9 @@ class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> {
bool TraverseCXXOperatorCallExpr(CXXOperatorCallExpr *S) {
if (getOperatorNodeKind(*S) ==
syntax::NodeKind::PostfixUnaryOperatorExpression) {
- // A postfix unary operator is declared as taking two operands. The second
- // operand is used to distinguish from its prefix counterpart. In the
- // semantic AST this "phantom" operand is represented as a
+ // A postfix unary operator is declared as taking two operands. The
+ // second operand is used to distinguish from its prefix counterpart. In
+ // the semantic AST this "phantom" operand is represented as a
// `IntegerLiteral` with invalid `SourceLocation`. We skip visiting this
// operand because it does not correspond to anything written in source
// code
diff --git a/clang/lib/Tooling/Syntax/Nodes.cpp b/clang/lib/Tooling/Syntax/Nodes.cpp
index 3d9b943d6db1..e1aa2521a2a9 100644
--- a/clang/lib/Tooling/Syntax/Nodes.cpp
+++ b/clang/lib/Tooling/Syntax/Nodes.cpp
@@ -32,6 +32,16 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeKind K) {
return OS << "BoolLiteralExpression";
case NodeKind::CxxNullPtrExpression:
return OS << "CxxNullPtrExpression";
+ case NodeKind::UnknownUserDefinedLiteralExpression:
+ return OS << "UnknownUserDefinedLiteralExpression";
+ case NodeKind::IntegerUserDefinedLiteralExpression:
+ return OS << "IntegerUserDefinedLiteralExpression";
+ case NodeKind::FloatUserDefinedLiteralExpression:
+ return OS << "FloatUserDefinedLiteralExpression";
+ case NodeKind::CharUserDefinedLiteralExpression:
+ return OS << "CharUserDefinedLiteralExpression";
+ case NodeKind::StringUserDefinedLiteralExpression:
+ return OS << "StringUserDefinedLiteralExpression";
case NodeKind::PrefixUnaryOperatorExpression:
return OS << "PrefixUnaryOperatorExpression";
case NodeKind::PostfixUnaryOperatorExpression:
@@ -252,6 +262,11 @@ syntax::Leaf *syntax::CxxNullPtrExpression::nullPtrKeyword() {
findChild(syntax::NodeRole::LiteralToken));
}
+syntax::Leaf *syntax::UserDefinedLiteralExpression::literalToken() {
+ return llvm::cast_or_null<syntax::Leaf>(
+ findChild(syntax::NodeRole::LiteralToken));
+}
+
syntax::Expression *syntax::BinaryOperatorExpression::lhs() {
return llvm::cast_or_null<syntax::Expression>(
findChild(syntax::NodeRole::BinaryOperatorExpression_leftHandSide));
diff --git a/clang/unittests/Tooling/Syntax/TreeTest.cpp b/clang/unittests/Tooling/Syntax/TreeTest.cpp
index acd0fbf2b52e..91e7a8f33e4e 100644
--- a/clang/unittests/Tooling/Syntax/TreeTest.cpp
+++ b/clang/unittests/Tooling/Syntax/TreeTest.cpp
@@ -1184,20 +1184,108 @@ void test() {
)txt"));
}
-TEST_P(SyntaxTreeTest, IntegerLiteral) {
+TEST_P(SyntaxTreeTest, UserDefinedLiteral) {
+ if (!GetParam().isCXX11OrLater()) {
+ return;
+ }
EXPECT_TRUE(treeDumpEqual(
R"cpp(
+unsigned operator "" _i(unsigned long long);
+unsigned operator "" _f(long double);
+unsigned operator "" _c(char);
+
+unsigned operator "" _r(const char*); // raw-literal operator
+
+template <char...>
+unsigned operator "" _t(); // numeric literal operator template
+
void test() {
- 12;
- 12u;
- 12l;
- 12ul;
- 014;
- 0XC;
+ 12_i; // call: operator "" _i(12uLL) | kind: integer
+ 1.2_f; // call: operator "" _f(1.2L) | kind: float
+ '2'_c; // call: operator "" _c('2') | kind: char
+
+ // TODO: Generate `FloatUserDefinedLiteralExpression` and
+ // `IntegerUserDefinedLiteralExpression` instead of
+ // `UnknownUserDefinedLiteralExpression`. See `getUserDefinedLiteralKind`
+ 12_r; // call: operator "" _r("12") | kind: integer
+ 1.2_r; // call: operator "" _i("1.2") | kind: float
+ 12_t; // call: operator<'1', '2'> "" _x() | kind: integer
+ 1.2_t; // call: operator<'1', '2'> "" _x() | kind: float
}
-)cpp",
+ )cpp",
R"txt(
*: TranslationUnit
+|-SimpleDeclaration
+| |-unsigned
+| |-SimpleDeclarator
+| | |-operator
+| | |-""
+| | |-_i
+| | `-ParametersAndQualifiers
+| | |-(
+| | |-SimpleDeclaration
+| | | |-unsigned
+| | | |-long
+| | | `-long
+| | `-)
+| `-;
+|-SimpleDeclaration
+| |-unsigned
+| |-SimpleDeclarator
+| | |-operator
+| | |-""
+| | |-_f
+| | `-ParametersAndQualifiers
+| | |-(
+| | |-SimpleDeclaration
+| | | |-long
+| | | `-double
+| | `-)
+| `-;
+|-SimpleDeclaration
+| |-unsigned
+| |-SimpleDeclarator
+| | |-operator
+| | |-""
+| | |-_c
+| | `-ParametersAndQualifiers
+| | |-(
+| | |-SimpleDeclaration
+| | | `-char
+| | `-)
+| `-;
+|-SimpleDeclaration
+| |-unsigned
+| |-SimpleDeclarator
+| | |-operator
+| | |-""
+| | |-_r
+| | `-ParametersAndQualifiers
+| | |-(
+| | |-SimpleDeclaration
+| | | |-const
+| | | |-char
+| | | `-SimpleDeclarator
+| | | `-*
+| | `-)
+| `-;
+|-TemplateDeclaration
+| |-template
+| |-<
+| |-SimpleDeclaration
+| | `-char
+| |-...
+| |->
+| `-SimpleDeclaration
+| |-unsigned
+| |-SimpleDeclarator
+| | |-operator
+| | |-""
+| | |-_t
+| | `-ParametersAndQualifiers
+| | |-(
+| | `-)
+| `-;
`-SimpleDeclaration
|-void
|-SimpleDeclarator
@@ -1208,28 +1296,95 @@ void test() {
`-CompoundStatement
|-{
|-ExpressionStatement
- | |-IntegerLiteralExpression
- | | `-12
+ | |-IntegerUserDefinedLiteralExpression
+ | | `-12_i
| `-;
|-ExpressionStatement
- | |-IntegerLiteralExpression
- | | `-12u
+ | |-FloatUserDefinedLiteralExpression
+ | | `-1.2_f
| `-;
|-ExpressionStatement
- | |-IntegerLiteralExpression
- | | `-12l
+ | |-CharUserDefinedLiteralExpression
+ | | `-'2'_c
| `-;
|-ExpressionStatement
- | |-IntegerLiteralExpression
- | | `-12ul
+ | |-UnknownUserDefinedLiteralExpression
+ | | `-12_r
| `-;
|-ExpressionStatement
- | |-IntegerLiteralExpression
- | | `-014
+ | |-UnknownUserDefinedLiteralExpression
+ | | `-1.2_r
| `-;
|-ExpressionStatement
- | |-IntegerLiteralExpression
- | | `-0XC
+ | |-UnknownUserDefinedLiteralExpression
+ | | `-12_t
+ | `-;
+ |-ExpressionStatement
+ | |-UnknownUserDefinedLiteralExpression
+ | | `-1.2_t
+ | `-;
+ `-}
+)txt"));
+}
+
+TEST_P(SyntaxTreeTest, UserDefinedLiteralString) {
+ if (!GetParam().isCXX11OrLater()) {
+ return;
+ }
+ EXPECT_TRUE(treeDumpEqual(
+ R"cpp(
+typedef decltype(sizeof(void *)) size_t;
+unsigned operator "" _s(const char*, size_t);
+void test() {
+ "12"_s;// call: operator "" _s("12") | kind: string
+}
+ )cpp",
+ R"txt(
+*: TranslationUnit
+|-SimpleDeclaration
+| |-typedef
+| |-decltype
+| |-(
+| |-UnknownExpression
+| | |-sizeof
+| | |-(
+| | |-void
+| | |-*
+| | `-)
+| |-)
+| |-SimpleDeclarator
+| | `-size_t
+| `-;
+|-SimpleDeclaration
+| |-unsigned
+| |-SimpleDeclarator
+| | |-operator
+| | |-""
+| | |-_s
+| | `-ParametersAndQualifiers
+| | |-(
+| | |-SimpleDeclaration
+| | | |-const
+| | | |-char
+| | | `-SimpleDeclarator
+| | | `-*
+| | |-,
+| | |-SimpleDeclaration
+| | | `-size_t
+| | `-)
+| `-;
+`-SimpleDeclaration
+ |-void
+ |-SimpleDeclarator
+ | |-test
+ | `-ParametersAndQualifiers
+ | |-(
+ | `-)
+ `-CompoundStatement
+ |-{
+ |-ExpressionStatement
+ | |-StringUserDefinedLiteralExpression
+ | | `-"12"_s
| `-;
`-}
)txt"));
More information about the cfe-commits
mailing list